aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c726
1 files changed, 186 insertions, 540 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 54139fa514e..77cccda1ad0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -75,6 +75,7 @@
#include <net/netdma.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>
+#include <net/busy_poll.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -172,11 +173,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
- orig_sport, orig_dport, sk, true);
+ orig_sport, orig_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
- IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return err;
}
@@ -274,13 +275,6 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
u32 mtu = tcp_sk(sk)->mtu_info;
- /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
- * send out by Linux are always <576bytes so they should go through
- * unfragmented).
- */
- if (sk->sk_state == TCP_LISTEN)
- return;
-
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -294,6 +288,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
mtu = dst_mtu(dst);
if (inet->pmtudisc != IP_PMTUDISC_DONT &&
+ ip_sk_accept_pmtu(sk) &&
inet_csk(sk)->icsk_pmtu_cookie > mtu) {
tcp_sync_mss(sk, mtu);
@@ -341,8 +336,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
const int code = icmp_hdr(icmp_skb)->code;
struct sock *sk;
struct sk_buff *skb;
- struct request_sock *req;
- __u32 seq;
+ struct request_sock *fastopen;
+ __u32 seq, snd_una;
__u32 remaining;
int err;
struct net *net = dev_net(icmp_skb->dev);
@@ -369,11 +364,10 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
* We do take care of PMTU discovery (RFC1191) special case :
* we can receive locally generated ICMP messages while socket is held.
*/
- if (sock_owned_by_user(sk) &&
- type != ICMP_DEST_UNREACH &&
- code != ICMP_FRAG_NEEDED)
- NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
-
+ if (sock_owned_by_user(sk)) {
+ if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+ }
if (sk->sk_state == TCP_CLOSE)
goto out;
@@ -384,12 +378,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
icsk = inet_csk(sk);
tp = tcp_sk(sk);
- req = tp->fastopen_rsk;
seq = ntohl(th->seq);
+ /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+ fastopen = tp->fastopen_rsk;
+ snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
- !between(seq, tp->snd_una, tp->snd_nxt) &&
- (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
- /* For a Fast Open socket, allow seq to be snt_isn. */
+ !between(seq, snd_una, tp->snd_nxt)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -409,6 +403,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
goto out;
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+ /* We are not interested in TCP_LISTEN and open_requests
+ * (SYN-ACKs send out by Linux are always <576bytes so
+ * they should go through unfragmented).
+ */
+ if (sk->sk_state == TCP_LISTEN)
+ goto out;
+
tp->mtu_info = info;
if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
@@ -425,16 +426,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
break;
if (seq != tp->snd_una || !icsk->icsk_retransmits ||
- !icsk->icsk_backoff)
+ !icsk->icsk_backoff || fastopen)
break;
- /* XXX (TFO) - revisit the following logic for TFO */
-
if (sock_owned_by_user(sk))
break;
icsk->icsk_backoff--;
- inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
+ inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
tcp_bound_rto(sk);
@@ -461,14 +460,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
goto out;
}
- /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
- * than following the TCP_SYN_RECV case and closing the socket,
- * we ignore the ICMP error and keep trying like a fully established
- * socket. Is this the right thing to do?
- */
- if (req && req->sk == NULL)
- goto out;
-
switch (sk->sk_state) {
struct request_sock *req, **prev;
case TCP_LISTEN:
@@ -497,13 +488,17 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
* errors returned from accept().
*/
inet_csk_reqsk_queue_drop(sk, req, prev);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
goto out;
case TCP_SYN_SENT:
- case TCP_SYN_RECV: /* Cannot happen.
- It can f.e. if SYNs crossed,
- or Fast Open.
- */
+ case TCP_SYN_RECV:
+ /* Only in fast or simultaneous open. If a fast open socket is
+ * is already accepted it is treated as a connected one below.
+ */
+ if (fastopen && fastopen->sk == NULL)
+ break;
+
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
@@ -545,8 +540,7 @@ out:
sock_put(sk);
}
-static void __tcp_v4_send_check(struct sk_buff *skb,
- __be32 saddr, __be32 daddr)
+void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct tcphdr *th = tcp_hdr(skb);
@@ -571,23 +565,6 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_v4_send_check);
-int tcp_v4_gso_send_check(struct sk_buff *skb)
-{
- const struct iphdr *iph;
- struct tcphdr *th;
-
- if (!pskb_may_pull(skb, sizeof(*th)))
- return -EINVAL;
-
- iph = ip_hdr(skb);
- th = tcp_hdr(skb);
-
- th->check = 0;
- skb->ip_summed = CHECKSUM_PARTIAL;
- __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
- return 0;
-}
-
/*
* This routine will send an RST to the other tcp.
*
@@ -657,7 +634,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
* no RST generated if md5 hash doesn't match.
*/
sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, ip_hdr(skb)->daddr,
+ &tcp_hashinfo, ip_hdr(skb)->saddr,
+ th->source, ip_hdr(skb)->daddr,
ntohs(th->source), inet_iif(skb));
/* don't send rst if it can't find key */
if (!sk1)
@@ -725,7 +703,7 @@ release_sk1:
*/
static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
- u32 win, u32 ts, int oif,
+ u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
{
@@ -746,12 +724,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th);
- if (ts) {
+ if (tsecr) {
rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
- rep.opt[1] = htonl(tcp_time_stamp);
- rep.opt[2] = htonl(ts);
+ rep.opt[1] = htonl(tsval);
+ rep.opt[2] = htonl(tsecr);
arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
}
@@ -766,7 +744,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
#ifdef CONFIG_TCP_MD5SIG
if (key) {
- int offset = (ts) ? 3 : 0;
+ int offset = (tsecr) ? 3 : 0;
rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
@@ -801,6 +779,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+ tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw),
@@ -820,6 +799,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+ tcp_time_stamp,
req->ts_recent,
0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -835,27 +815,26 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
*/
static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- struct request_values *rvp,
u16 queue_mapping,
- bool nocache)
+ struct tcp_fastopen_cookie *foc)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
int err = -1;
- struct sk_buff * skb;
+ struct sk_buff *skb;
/* First, grab a route. */
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
- skb = tcp_make_synack(sk, dst, req, rvp, NULL);
+ skb = tcp_make_synack(sk, dst, req, foc);
if (skb) {
- __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
+ __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
skb_set_queue_mapping(skb, queue_mapping);
- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
- ireq->rmt_addr,
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
ireq->opt);
err = net_xmit_eval(err);
if (!tcp_rsk(req)->snt_synack && !err)
@@ -865,13 +844,14 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
return err;
}
-static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
- struct request_values *rvp)
+static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
{
- int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
+ int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
- if (!res)
+ if (!res) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ }
return res;
}
@@ -894,8 +874,6 @@ bool tcp_syn_flood_action(struct sock *sk,
bool want_cookie = false;
struct listen_sock *lopt;
-
-
#ifdef CONFIG_SYN_COOKIES
if (sysctl_tcp_syncookies) {
msg = "Sending cookies";
@@ -906,7 +884,7 @@ bool tcp_syn_flood_action(struct sock *sk,
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
- if (!lopt->synflood_warned) {
+ if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
lopt->synflood_warned = 1;
pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, ntohs(tcp_hdr(skb)->dest), msg);
@@ -951,7 +929,6 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
- struct hlist_node *pos;
unsigned int size = sizeof(struct in_addr);
struct tcp_md5sig_info *md5sig;
@@ -965,7 +942,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
if (family == AF_INET6)
size = sizeof(struct in6_addr);
#endif
- hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
+ hlist_for_each_entry_rcu(key, &md5sig->head, node) {
if (key->family != family)
continue;
if (!memcmp(&key->addr, addr, size))
@@ -990,7 +967,7 @@ static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
{
union tcp_md5_addr *addr;
- addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
+ addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
return tcp_md5_do_lookup(sk, addr, AF_INET);
}
@@ -1003,7 +980,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
+ key = tcp_md5_do_lookup(sk, addr, family);
if (key) {
/* Pre-existing entry - just update that one. */
memcpy(key->key, newkey, newkeylen);
@@ -1026,7 +1003,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key = sock_kmalloc(sk, sizeof(*key), gfp);
if (!key)
return -ENOMEM;
- if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
+ if (!tcp_alloc_md5sig_pool()) {
sock_kfree_s(sk, key, sizeof(*key));
return -ENOMEM;
}
@@ -1044,20 +1021,14 @@ EXPORT_SYMBOL(tcp_md5_do_add);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
- struct tcp_md5sig_info *md5sig;
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
+ key = tcp_md5_do_lookup(sk, addr, family);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
kfree_rcu(key, rcu);
- md5sig = rcu_dereference_protected(tp->md5sig_info,
- sock_owned_by_user(sk));
- if (hlist_empty(&md5sig->head))
- tcp_free_md5sig_pool();
return 0;
}
EXPORT_SYMBOL(tcp_md5_do_del);
@@ -1066,14 +1037,12 @@ static void tcp_clear_md5_list(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
struct tcp_md5sig_info *md5sig;
md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
- if (!hlist_empty(&md5sig->head))
- tcp_free_md5sig_pool();
- hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
+ hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
hlist_del_rcu(&key->node);
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
kfree_rcu(key, rcu);
@@ -1175,8 +1144,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
saddr = inet_sk(sk)->inet_saddr;
daddr = inet_sk(sk)->inet_daddr;
} else if (req) {
- saddr = inet_rsk(req)->loc_addr;
- daddr = inet_rsk(req)->rmt_addr;
+ saddr = inet_rsk(req)->ir_loc_addr;
+ daddr = inet_rsk(req)->ir_rmt_addr;
} else {
const struct iphdr *iph = ip_hdr(skb);
saddr = iph->saddr;
@@ -1285,189 +1254,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
};
#endif
-static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct tcp_fastopen_cookie *foc,
- struct tcp_fastopen_cookie *valid_foc)
-{
- bool skip_cookie = false;
- struct fastopen_queue *fastopenq;
-
- if (likely(!fastopen_cookie_present(foc))) {
- /* See include/net/tcp.h for the meaning of these knobs */
- if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
- ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
- (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
- skip_cookie = true; /* no cookie to validate */
- else
- return false;
- }
- fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
- /* A FO option is present; bump the counter. */
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
-
- /* Make sure the listener has enabled fastopen, and we don't
- * exceed the max # of pending TFO requests allowed before trying
- * to validating the cookie in order to avoid burning CPU cycles
- * unnecessarily.
- *
- * XXX (TFO) - The implication of checking the max_qlen before
- * processing a cookie request is that clients can't differentiate
- * between qlen overflow causing Fast Open to be disabled
- * temporarily vs a server not supporting Fast Open at all.
- */
- if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
- fastopenq == NULL || fastopenq->max_qlen == 0)
- return false;
-
- if (fastopenq->qlen >= fastopenq->max_qlen) {
- struct request_sock *req1;
- spin_lock(&fastopenq->lock);
- req1 = fastopenq->rskq_rst_head;
- if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
- spin_unlock(&fastopenq->lock);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
- /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
- foc->len = -1;
- return false;
- }
- fastopenq->rskq_rst_head = req1->dl_next;
- fastopenq->qlen--;
- spin_unlock(&fastopenq->lock);
- reqsk_free(req1);
- }
- if (skip_cookie) {
- tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- return true;
- }
- if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
- if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
- if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
- memcmp(&foc->val[0], &valid_foc->val[0],
- TCP_FASTOPEN_COOKIE_SIZE) != 0)
- return false;
- valid_foc->len = -1;
- }
- /* Acknowledge the data received from the peer. */
- tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- return true;
- } else if (foc->len == 0) { /* Client requesting a cookie */
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENCOOKIEREQD);
- } else {
- /* Client sent a cookie with wrong size. Treat it
- * the same as invalid and return a valid one.
- */
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
- }
- return false;
-}
-
-static int tcp_v4_conn_req_fastopen(struct sock *sk,
- struct sk_buff *skb,
- struct sk_buff *skb_synack,
- struct request_sock *req,
- struct request_values *rvp)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct sock *child;
- int err;
-
- req->num_retrans = 0;
- req->num_timeout = 0;
- req->sk = NULL;
-
- child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
- if (child == NULL) {
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- kfree_skb(skb_synack);
- return -1;
- }
- err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
- ireq->rmt_addr, ireq->opt);
- err = net_xmit_eval(err);
- if (!err)
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
- /* XXX (TFO) - is it ok to ignore error and continue? */
-
- spin_lock(&queue->fastopenq->lock);
- queue->fastopenq->qlen++;
- spin_unlock(&queue->fastopenq->lock);
-
- /* Initialize the child socket. Have to fix some values to take
- * into account the child is a Fast Open socket and is created
- * only out of the bits carried in the SYN packet.
- */
- tp = tcp_sk(child);
-
- tp->fastopen_rsk = req;
- /* Do a hold on the listner sk so that if the listener is being
- * closed, the child that has been accepted can live on and still
- * access listen_lock.
- */
- sock_hold(sk);
- tcp_rsk(req)->listener = sk;
-
- /* RFC1323: The window in SYN & SYN/ACK segments is never
- * scaled. So correct it appropriately.
- */
- tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
-
- /* Activate the retrans timer so that SYNACK can be retransmitted.
- * The request socket is not added to the SYN table of the parent
- * because it's been added to the accept queue directly.
- */
- inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
- TCP_TIMEOUT_INIT, TCP_RTO_MAX);
-
- /* Add the child socket directly into the accept queue */
- inet_csk_reqsk_queue_add(sk, req, child);
-
- /* Now finish processing the fastopen child socket. */
- inet_csk(child)->icsk_af_ops->rebuild_header(child);
- tcp_init_congestion_control(child);
- tcp_mtup_init(child);
- tcp_init_buffer_space(child);
- tcp_init_metrics(child);
-
- /* Queue the data carried in the SYN packet. We need to first
- * bump skb's refcnt because the caller will attempt to free it.
- *
- * XXX (TFO) - we honor a zero-payload TFO request for now.
- * (Any reason not to?)
- */
- if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
- /* Don't queue the skb if there is no payload in SYN.
- * XXX (TFO) - How about SYN+FIN?
- */
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- } else {
- skb = skb_get(skb);
- skb_dst_drop(skb);
- __skb_pull(skb, tcp_hdr(skb)->doff * 4);
- skb_set_owner_r(skb, child);
- __skb_queue_tail(&child->sk_receive_queue, skb);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- tp->syn_data_acked = 1;
- }
- sk->sk_data_ready(sk, 0);
- bh_unlock_sock(child);
- sock_put(child);
- WARN_ON(req->sk == NULL);
- return 0;
-}
-
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp_extend_values tmp_ext;
struct tcp_options_received tmp_opt;
- const u8 *hash_location;
struct request_sock *req;
struct inet_request_sock *ireq;
struct tcp_sock *tp = tcp_sk(sk);
@@ -1475,12 +1264,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
- bool want_cookie = false;
+ bool want_cookie = false, fastopen;
struct flowi4 fl4;
struct tcp_fastopen_cookie foc = { .len = -1 };
- struct tcp_fastopen_cookie valid_foc = { .len = -1 };
- struct sk_buff *skb_synack;
- int do_fastopen;
+ int err;
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1490,7 +1277,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
+ if ((sysctl_tcp_syncookies == 2 ||
+ inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
if (!want_cookie)
goto drop;
@@ -1501,8 +1289,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* clogging syn queue with openreqs with exponentially increasing
* timeout.
*/
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
goto drop;
+ }
req = inet_reqsk_alloc(&tcp_request_sock_ops);
if (!req)
@@ -1515,42 +1305,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
- want_cookie ? NULL : &foc);
-
- if (tmp_opt.cookie_plus > 0 &&
- tmp_opt.saw_tstamp &&
- !tp->rx_opt.cookie_out_never &&
- (sysctl_tcp_cookie_size > 0 ||
- (tp->cookie_values != NULL &&
- tp->cookie_values->cookie_desired > 0))) {
- u8 *c;
- u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
- int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
-
- if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
- goto drop_and_release;
-
- /* Secret recipe starts with IP addresses */
- *mess++ ^= (__force u32)daddr;
- *mess++ ^= (__force u32)saddr;
-
- /* plus variable length Initiator Cookie */
- c = (u8 *)mess;
- while (l-- > 0)
- *c++ ^= *hash_location++;
-
- want_cookie = false; /* not our kind of cookie */
- tmp_ext.cookie_out_never = 0; /* false */
- tmp_ext.cookie_plus = tmp_opt.cookie_plus;
- } else if (!tp->rx_opt.cookie_in_always) {
- /* redundant indications, but ensure initialization. */
- tmp_ext.cookie_out_never = 1; /* true */
- tmp_ext.cookie_plus = 0;
- } else {
- goto drop_and_release;
- }
- tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
+ tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -1559,16 +1314,17 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_openreq_init(req, &tmp_opt, skb);
ireq = inet_rsk(req);
- ireq->loc_addr = daddr;
- ireq->rmt_addr = saddr;
+ ireq->ir_loc_addr = daddr;
+ ireq->ir_rmt_addr = saddr;
ireq->no_srccheck = inet_sk(sk)->transparent;
ireq->opt = tcp_v4_save_options(skb);
+ ireq->ir_mark = inet_request_mark(sk, skb);
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
if (!want_cookie || tmp_opt.tstamp_ok)
- TCP_ECN_create_request(req, skb);
+ TCP_ECN_create_request(req, skb, sock_net(sk));
if (want_cookie) {
isn = cookie_v4_init_sequence(sk, skb, &req->mss);
@@ -1611,54 +1367,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
isn = tcp_v4_init_sequence(skb);
}
- tcp_rsk(req)->snt_isn = isn;
-
- if (dst == NULL) {
- dst = inet_csk_route_req(sk, &fl4, req);
- if (dst == NULL)
- goto drop_and_free;
- }
- do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
-
- /* We don't call tcp_v4_send_synack() directly because we need
- * to make sure a child socket can be created successfully before
- * sending back synack!
- *
- * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
- * (or better yet, call tcp_send_synack() in the child context
- * directly, but will have to fix bunch of other code first)
- * after syn_recv_sock() except one will need to first fix the
- * latter to remove its dependency on the current implementation
- * of tcp_v4_send_synack()->tcp_select_initial_window().
- */
- skb_synack = tcp_make_synack(sk, dst, req,
- (struct request_values *)&tmp_ext,
- fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
-
- if (skb_synack) {
- __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
- skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
- } else
+ if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
goto drop_and_free;
- if (likely(!do_fastopen)) {
- int err;
- err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
- ireq->rmt_addr, ireq->opt);
- err = net_xmit_eval(err);
+ tcp_rsk(req)->snt_isn = isn;
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
+ tcp_openreq_init_rwin(req, sk, dst);
+ fastopen = !want_cookie &&
+ tcp_try_fastopen(sk, skb, req, &foc, dst);
+ err = tcp_v4_send_synack(sk, dst, req,
+ skb_get_queue_mapping(skb), &foc);
+ if (!fastopen) {
if (err || want_cookie)
goto drop_and_free;
tcp_rsk(req)->snt_synack = tcp_time_stamp;
tcp_rsk(req)->listener = NULL;
- /* Add the request_sock to the SYN table */
inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- if (fastopen_cookie_present(&foc) && foc.len != 0)
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
- (struct request_values *)&tmp_ext))
- goto drop_and_free;
+ }
return 0;
@@ -1667,6 +1393,7 @@ drop_and_release:
drop_and_free:
reqsk_free(req);
drop:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return 0;
}
EXPORT_SYMBOL(tcp_v4_conn_request);
@@ -1702,9 +1429,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
- newinet->inet_daddr = ireq->rmt_addr;
- newinet->inet_rcv_saddr = ireq->loc_addr;
- newinet->inet_saddr = ireq->loc_addr;
+ newinet->inet_daddr = ireq->ir_rmt_addr;
+ newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+ newinet->inet_saddr = ireq->ir_loc_addr;
inet_opt = ireq->opt;
rcu_assign_pointer(newinet->inet_opt, inet_opt);
ireq->opt = NULL;
@@ -1725,7 +1452,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
sk_setup_caps(newsk, dst);
- tcp_mtup_init(newsk);
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric_advmss(dst);
if (tcp_sk(sk)->rx_opt.user_mss &&
@@ -1733,8 +1459,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
tcp_initialize_rcv_mss(newsk);
- tcp_synack_rtt_meas(newsk, req);
- newtp->total_retrans = req->num_retrans;
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
@@ -1804,28 +1528,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk;
}
-static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
-{
- const struct iphdr *iph = ip_hdr(skb);
-
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v4_check(skb->len, iph->saddr,
- iph->daddr, skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- return 0;
- }
- }
-
- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
- skb->len, IPPROTO_TCP, 0);
-
- if (skb->len <= 76) {
- return __skb_checksum_complete(skb);
- }
- return 0;
-}
-
-
/* The socket must have it's spinlock held when we get
* here.
*
@@ -1859,10 +1561,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sk->sk_rx_dst = NULL;
}
}
- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
- rsk = sk;
- goto reset;
- }
+ tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
return 0;
}
@@ -1903,6 +1602,7 @@ discard:
return 0;
csum_err:
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
@@ -1945,6 +1645,51 @@ void tcp_v4_early_demux(struct sk_buff *skb)
}
}
+/* Packet is added to VJ-style prequeue for processing in process
+ * context, if a reader task is waiting. Apparently, this exciting
+ * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
+ * failed somewhere. Latency? Burstiness? Well, at least now we will
+ * see, why it failed. 8)8) --ANK
+ *
+ */
+bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (sysctl_tcp_low_latency || !tp->ucopy.task)
+ return false;
+
+ if (skb->len <= tcp_hdrlen(skb) &&
+ skb_queue_len(&tp->ucopy.prequeue) == 0)
+ return false;
+
+ skb_dst_force(skb);
+ __skb_queue_tail(&tp->ucopy.prequeue, skb);
+ tp->ucopy.memory += skb->truesize;
+ if (tp->ucopy.memory > sk->sk_rcvbuf) {
+ struct sk_buff *skb1;
+
+ BUG_ON(sock_owned_by_user(sk));
+
+ while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
+ sk_backlog_rcv(sk, skb1);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPPREQUEUEDROPPED);
+ }
+
+ tp->ucopy.memory = 0;
+ } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
+ wake_up_interruptible_sync_poll(sk_sleep(sk),
+ POLLIN | POLLRDNORM | POLLRDBAND);
+ if (!inet_csk_ack_scheduled(sk))
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ (3 * tcp_rto_min(sk)) / 4,
+ TCP_RTO_MAX);
+ }
+ return true;
+}
+EXPORT_SYMBOL(tcp_prequeue);
+
/*
* From tcp_input.c
*/
@@ -1977,8 +1722,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is eliminated.
* So, we defer the checks. */
- if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
- goto bad_packet;
+
+ if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
+ goto csum_error;
th = tcp_hdr(skb);
iph = ip_hdr(skb);
@@ -2010,6 +1756,7 @@ process:
if (sk_filter(sk, skb))
goto discard_and_relse;
+ sk_mark_napi_id(sk, skb);
skb->dev = NULL;
bh_lock_sock_nested(sk);
@@ -2044,6 +1791,8 @@ no_tcp_socket:
goto discard_it;
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+csum_error:
+ TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -2065,15 +1814,19 @@ do_time_wait:
goto discard_it;
}
- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
- TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+ if (skb->len < (th->doff << 2)) {
inet_twsk_put(inet_twsk(sk));
- goto discard_it;
+ goto bad_packet;
+ }
+ if (tcp_checksum_complete(skb)) {
+ inet_twsk_put(inet_twsk(sk));
+ goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
&tcp_hashinfo,
+ iph->saddr, th->source,
iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
@@ -2191,12 +1944,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
if (inet_csk(sk)->icsk_bind_hash)
inet_put_port(sk);
- /* TCP Cookie Transactions */
- if (tp->cookie_values != NULL) {
- kref_put(&tp->cookie_values->kref,
- tcp_cookie_values_release);
- tp->cookie_values = NULL;
- }
BUG_ON(tp->fastopen_rsk != NULL);
/* If socket is aborted during connect operation */
@@ -2210,18 +1957,6 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
-static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
-{
- return hlist_nulls_empty(head) ? NULL :
- list_entry(head->first, struct inet_timewait_sock, tw_node);
-}
-
-static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
-{
- return !is_a_nulls(tw->tw_node.next) ?
- hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
-}
-
/*
* Get next listener socket follow cur. If cur is NULL, get first socket
* starting from bucket given in st->bucket; when st->bucket is zero the
@@ -2325,10 +2060,9 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
return rc;
}
-static inline bool empty_bucket(struct tcp_iter_state *st)
+static inline bool empty_bucket(const struct tcp_iter_state *st)
{
- return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
- hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
+ return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
}
/*
@@ -2345,7 +2079,6 @@ static void *established_get_first(struct seq_file *seq)
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
struct sock *sk;
struct hlist_nulls_node *node;
- struct inet_timewait_sock *tw;
spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
/* Lockless fast path for the common case of empty buckets */
@@ -2361,18 +2094,7 @@ static void *established_get_first(struct seq_file *seq)
rc = sk;
goto out;
}
- st->state = TCP_SEQ_STATE_TIME_WAIT;
- inet_twsk_for_each(tw, node,
- &tcp_hashinfo.ehash[st->bucket].twchain) {
- if (tw->tw_family != st->family ||
- !net_eq(twsk_net(tw), net)) {
- continue;
- }
- rc = tw;
- goto out;
- }
spin_unlock_bh(lock);
- st->state = TCP_SEQ_STATE_ESTABLISHED;
}
out:
return rc;
@@ -2381,7 +2103,6 @@ out:
static void *established_get_next(struct seq_file *seq, void *cur)
{
struct sock *sk = cur;
- struct inet_timewait_sock *tw;
struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
@@ -2389,45 +2110,16 @@ static void *established_get_next(struct seq_file *seq, void *cur)
++st->num;
++st->offset;
- if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
- tw = cur;
- tw = tw_next(tw);
-get_tw:
- while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
- tw = tw_next(tw);
- }
- if (tw) {
- cur = tw;
- goto out;
- }
- spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
- st->state = TCP_SEQ_STATE_ESTABLISHED;
-
- /* Look for next non empty bucket */
- st->offset = 0;
- while (++st->bucket <= tcp_hashinfo.ehash_mask &&
- empty_bucket(st))
- ;
- if (st->bucket > tcp_hashinfo.ehash_mask)
- return NULL;
-
- spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
- sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
- } else
- sk = sk_nulls_next(sk);
+ sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) {
if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
- goto found;
+ return sk;
}
- st->state = TCP_SEQ_STATE_TIME_WAIT;
- tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
- goto get_tw;
-found:
- cur = sk;
-out:
- return cur;
+ spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+ ++st->bucket;
+ return established_get_first(seq);
}
static void *established_get_idx(struct seq_file *seq, loff_t pos)
@@ -2480,10 +2172,9 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
if (rc)
break;
st->bucket = 0;
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
/* Fallthrough */
case TCP_SEQ_STATE_ESTABLISHED:
- case TCP_SEQ_STATE_TIME_WAIT:
- st->state = TCP_SEQ_STATE_ESTABLISHED;
if (st->bucket > tcp_hashinfo.ehash_mask)
break;
rc = established_get_first(seq);
@@ -2540,7 +2231,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
break;
case TCP_SEQ_STATE_ESTABLISHED:
- case TCP_SEQ_STATE_TIME_WAIT:
rc = established_get_next(seq, v);
break;
}
@@ -2564,7 +2254,6 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
if (v != SEQ_START_TOKEN)
spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
break;
- case TCP_SEQ_STATE_TIME_WAIT:
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
@@ -2574,7 +2263,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
int tcp_seq_open(struct inode *inode, struct file *file)
{
- struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
+ struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
struct tcp_iter_state *s;
int err;
@@ -2609,23 +2298,23 @@ EXPORT_SYMBOL(tcp_proc_register);
void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
{
- proc_net_remove(net, afinfo->name);
+ remove_proc_entry(afinfo->name, net->proc_net);
}
EXPORT_SYMBOL(tcp_proc_unregister);
static void get_openreq4(const struct sock *sk, const struct request_sock *req,
- struct seq_file *f, int i, kuid_t uid, int *len)
+ struct seq_file *f, int i, kuid_t uid)
{
const struct inet_request_sock *ireq = inet_rsk(req);
long delta = req->expires - jiffies;
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
+ " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
i,
- ireq->loc_addr,
+ ireq->ir_loc_addr,
ntohs(inet_sk(sk)->inet_sport),
- ireq->rmt_addr,
- ntohs(ireq->rmt_port),
+ ireq->ir_rmt_addr,
+ ntohs(ireq->ir_rmt_port),
TCP_SYN_RECV,
0, 0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
@@ -2635,11 +2324,10 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
0, /* non standard timer */
0, /* open_requests have no inode */
atomic_read(&sk->sk_refcnt),
- req,
- len);
+ req);
}
-static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
+static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
{
int timer_active;
unsigned long timer_expires;
@@ -2653,7 +2341,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
__u16 srcp = ntohs(inet->inet_sport);
int rx_queue;
- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
@@ -2676,7 +2366,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
- "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
+ "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
i, src, srcp, dest, destp, sk->sk_state,
tp->write_seq - tp->snd_una,
rx_queue,
@@ -2693,16 +2383,15 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
tp->snd_cwnd,
sk->sk_state == TCP_LISTEN ?
(fastopenq ? fastopenq->max_qlen : 0) :
- (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
- len);
+ (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
}
static void get_timewait4_sock(const struct inet_timewait_sock *tw,
- struct seq_file *f, int i, int *len)
+ struct seq_file *f, int i)
{
__be32 dest, src;
__u16 destp, srcp;
- long delta = tw->tw_ttd - jiffies;
+ s32 delta = tw->tw_ttd - inet_tw_time_stamp();
dest = tw->tw_daddr;
src = tw->tw_rcv_saddr;
@@ -2710,10 +2399,10 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
srcp = ntohs(tw->tw_sport);
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
- atomic_read(&tw->tw_refcnt), tw, len);
+ atomic_read(&tw->tw_refcnt), tw);
}
#define TMPSZ 150
@@ -2721,11 +2410,11 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
static int tcp4_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
- int len;
+ struct sock *sk = v;
+ seq_setwidth(seq, TMPSZ - 1);
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "%-*s\n", TMPSZ - 1,
- " sl local_address rem_address st tx_queue "
+ seq_puts(seq, " sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
"inode");
goto out;
@@ -2735,17 +2424,17 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
case TCP_SEQ_STATE_ESTABLISHED:
- get_tcp4_sock(v, seq, st->num, &len);
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait4_sock(v, seq, st->num);
+ else
+ get_tcp4_sock(v, seq, st->num);
break;
case TCP_SEQ_STATE_OPENREQ:
- get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
- break;
- case TCP_SEQ_STATE_TIME_WAIT:
- get_timewait4_sock(v, seq, st->num, &len);
+ get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
break;
}
- seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
out:
+ seq_pad(seq, '\n');
return 0;
}
@@ -2792,52 +2481,6 @@ void tcp4_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
-{
- const struct iphdr *iph = skb_gro_network_header(skb);
- __wsum wsum;
- __sum16 sum;
-
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
- skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-flush:
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
-
- case CHECKSUM_NONE:
- wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
- skb_gro_len(skb), IPPROTO_TCP, 0);
- sum = csum_fold(skb_checksum(skb,
- skb_gro_offset(skb),
- skb_gro_len(skb),
- wsum));
- if (sum)
- goto flush;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-
- return tcp_gro_receive(head, skb);
-}
-
-int tcp4_gro_complete(struct sk_buff *skb)
-{
- const struct iphdr *iph = ip_hdr(skb);
- struct tcphdr *th = tcp_hdr(skb);
-
- th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
- iph->saddr, iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-
- return tcp_gro_complete(skb);
-}
-
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
@@ -2861,10 +2504,12 @@ struct proto tcp_prot = {
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
+ .stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
+ .sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
@@ -2888,6 +2533,7 @@ EXPORT_SYMBOL(tcp_prot);
static int __net_init tcp_sk_init(struct net *net)
{
+ net->ipv4.sysctl_tcp_ecn = 2;
return 0;
}