aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/rtnetlink.h3
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/net/dst.h6
-rw-r--r--include/net/flow.h5
-rw-r--r--include/net/inet_connection_sock.h1
-rw-r--r--include/net/inet_sock.h2
-rw-r--r--include/net/inetpeer.h8
-rw-r--r--include/net/netns/ipv4.h3
-rw-r--r--include/net/route.h61
-rw-r--r--include/net/tcp.h9
-rw-r--r--net/core/rtnetlink.c4
-rw-r--r--net/decnet/dn_route.c13
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inetpeer.c4
-rw-r--r--net/ipv4/route.c349
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c188
-rw-r--r--net/ipv4/tcp_ipv4.c46
-rw-r--r--net/ipv4/tcp_metrics.c697
-rw-r--r--net/ipv4/tcp_minisocks.c62
-rw-r--r--net/ipv4/xfrm4_policy.c8
-rw-r--r--net/ipv6/icmp.c4
-rw-r--r--net/ipv6/ip6_output.c10
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/route.c16
-rw-r--r--net/ipv6/tcp_ipv6.c49
29 files changed, 837 insertions, 731 deletions
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index ea60b085410..db71c4ad862 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -619,8 +619,7 @@ extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
extern void rtnl_set_sk_err(struct net *net, u32 group, int error);
extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
- u32 id, u32 ts, u32 tsage, long expires,
- u32 error);
+ u32 id, long expires, u32 error);
extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7d3bcedc062..2de9cf46f9f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -506,7 +506,6 @@ struct tcp_timewait_sock {
u32 tw_rcv_wnd;
u32 tw_ts_recent;
long tw_ts_recent_stamp;
- struct inet_peer *tw_peer;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *tw_md5_key;
#endif
diff --git a/include/net/dst.h b/include/net/dst.h
index b2634e44661..51610468c63 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -209,12 +209,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr
return msecs_to_jiffies(dst_metric(dst, metric));
}
-static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric,
- unsigned long rtt)
-{
- dst_metric_set(dst, metric, jiffies_to_msecs(rtt));
-}
-
static inline u32
dst_allfrag(const struct dst_entry *dst)
{
diff --git a/include/net/flow.h b/include/net/flow.h
index bd524f59856..ce9cb7656b4 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -20,9 +20,8 @@ struct flowi_common {
__u8 flowic_proto;
__u8 flowic_flags;
#define FLOWI_FLAG_ANYSRC 0x01
-#define FLOWI_FLAG_PRECOW_METRICS 0x02
-#define FLOWI_FLAG_CAN_SLEEP 0x04
-#define FLOWI_FLAG_RT_NOCACHE 0x08
+#define FLOWI_FLAG_CAN_SLEEP 0x02
+#define FLOWI_FLAG_RT_NOCACHE 0x04
__u32 flowic_secid;
};
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index af3c743a40e..291e7cee14e 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -43,7 +43,6 @@ struct inet_connection_sock_af_ops {
struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst);
- struct inet_peer *(*get_peer)(struct sock *sk);
u16 net_header_len;
u16 net_frag_header_len;
u16 sockaddr_len;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index ae17e1352d7..924d7b98ab6 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -245,8 +245,6 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl)
flags |= FLOWI_FLAG_ANYSRC;
- if (sk->sk_protocol == IPPROTO_TCP)
- flags |= FLOWI_FLAG_PRECOW_METRICS;
return flags;
}
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index c27c8f10ebd..53f464d7cdd 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -36,25 +36,19 @@ struct inet_peer {
u32 metrics[RTAX_MAX];
u32 rate_tokens; /* rate limiting for ICMP */
unsigned long rate_last;
- unsigned long pmtu_expires;
- u32 pmtu_orig;
- u32 pmtu_learned;
- struct inetpeer_addr_base redirect_learned;
union {
struct list_head gc_list;
struct rcu_head gc_rcu;
};
/*
* Once inet_peer is queued for deletion (refcnt == -1), following fields
- * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
+ * are not available: rid, ip_id_count
* We can share memory with rcu_head to help keep inet_peer small.
*/
union {
struct {
atomic_t rid; /* Frag reception counter */
atomic_t ip_id_count; /* IP ID for the next packet */
- __u32 tcp_ts;
- __u32 tcp_ts_stamp;
};
struct rcu_head rcu;
struct inet_peer *gc_next;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 599e48fa97c..2e089a99d60 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -7,6 +7,7 @@
#include <net/inet_frag.h>
+struct tcpm_hash_bucket;
struct ctl_table_header;
struct ipv4_devconf;
struct fib_rules_ops;
@@ -39,6 +40,8 @@ struct netns_ipv4 {
struct sock **icmp_sk;
struct sock *tcp_sock;
struct inet_peer_base *peers;
+ struct tcpm_hash_bucket *tcp_metrics_hash;
+ unsigned int tcp_metrics_hash_mask;
struct netns_frags frags;
#ifdef CONFIG_NETFILTER
struct xt_table *iptable_filter;
diff --git a/include/net/route.h b/include/net/route.h
index 211e2665139..52362368af0 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -40,7 +40,6 @@
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
struct fib_nh;
-struct inet_peer;
struct fib_info;
struct rtable {
struct dst_entry dst;
@@ -65,45 +64,10 @@ struct rtable {
__be32 rt_gateway;
/* Miscellaneous cached information */
- u32 rt_peer_genid;
- unsigned long _peer; /* long-living peer info */
+ u32 rt_pmtu;
struct fib_info *fi; /* for client ref to shared metrics */
};
-static inline struct inet_peer *rt_peer_ptr(struct rtable *rt)
-{
- return inetpeer_ptr(rt->_peer);
-}
-
-static inline bool rt_has_peer(struct rtable *rt)
-{
- return inetpeer_ptr_is_peer(rt->_peer);
-}
-
-static inline void __rt_set_peer(struct rtable *rt, struct inet_peer *peer)
-{
- __inetpeer_ptr_set_peer(&rt->_peer, peer);
-}
-
-static inline bool rt_set_peer(struct rtable *rt, struct inet_peer *peer)
-{
- return inetpeer_ptr_set_peer(&rt->_peer, peer);
-}
-
-static inline void rt_init_peer(struct rtable *rt, struct inet_peer_base *base)
-{
- inetpeer_init_ptr(&rt->_peer, base);
-}
-
-static inline void rt_transfer_peer(struct rtable *rt, struct rtable *ort)
-{
- rt->_peer = ort->_peer;
- if (rt_has_peer(ort)) {
- struct inet_peer *peer = rt_peer_ptr(ort);
- atomic_inc(&peer->refcnt);
- }
-}
-
static inline bool rt_is_input_route(const struct rtable *rt)
{
return rt->rt_route_iif != 0;
@@ -278,8 +242,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;
- if (protocol == IPPROTO_TCP)
- flow_flags |= FLOWI_FLAG_PRECOW_METRICS;
if (can_sleep)
flow_flags |= FLOWI_FLAG_CAN_SLEEP;
@@ -328,27 +290,6 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable
return rt;
}
-extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create);
-
-static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create)
-{
- if (rt_has_peer(rt))
- return rt_peer_ptr(rt);
-
- rt_bind_peer(rt, daddr, create);
- return (rt_has_peer(rt) ? rt_peer_ptr(rt) : NULL);
-}
-
-static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr)
-{
- return __rt_get_peer(rt, daddr, 0);
-}
-
-static inline struct inet_peer *rt_get_peer_create(struct rtable *rt, __be32 daddr)
-{
- return __rt_get_peer(rt, daddr, 1);
-}
-
static inline int inet_iif(const struct sk_buff *skb)
{
return skb_rtable(skb)->rt_iif;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 53fb7d81417..3618fefae04 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -388,6 +388,13 @@ extern void tcp_enter_frto(struct sock *sk);
extern void tcp_enter_loss(struct sock *sk, int how);
extern void tcp_clear_retrans(struct tcp_sock *tp);
extern void tcp_update_metrics(struct sock *sk);
+extern void tcp_init_metrics(struct sock *sk);
+extern void tcp_metrics_init(void);
+extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check);
+extern bool tcp_remember_stamp(struct sock *sk);
+extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
+extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
+extern void tcp_disable_fack(struct tcp_sock *tp);
extern void tcp_close(struct sock *sk, long timeout);
extern void tcp_init_sock(struct sock *sk);
extern unsigned int tcp_poll(struct file * file, struct socket *sock,
@@ -556,6 +563,8 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
return (tp->srtt >> 3) + tp->rttvar;
}
+extern void tcp_set_rto(struct sock *sk);
+
static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
{
tp->pred_flags = htonl((tp->tcp_header_len << 26) |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2b325c340b4..64127eee786 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -615,7 +615,7 @@ nla_put_failure:
EXPORT_SYMBOL(rtnetlink_put_metrics);
int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
- u32 ts, u32 tsage, long expires, u32 error)
+ long expires, u32 error)
{
struct rta_cacheinfo ci = {
.rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
@@ -623,8 +623,6 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
.rta_clntref = atomic_read(&(dst->__refcnt)),
.rta_error = error,
.rta_id = id,
- .rta_ts = ts,
- .rta_tsage = tsage,
};
if (expires)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 6e74b3f110b..b5594cc73ee 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1590,7 +1590,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
goto errout;
expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
+ if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires,
rt->dst.error) < 0)
goto errout;
@@ -1812,12 +1812,11 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
- rt->dst.dev ? rt->dst.dev->name : "*",
- dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
- dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
- atomic_read(&rt->dst.__refcnt),
- rt->dst.__use,
- (int) dst_metric(&rt->dst, RTAX_RTT));
+ rt->dst.dev ? rt->dst.dev->name : "*",
+ dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
+ dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
+ atomic_read(&rt->dst.__refcnt),
+ rt->dst.__use, 0);
return 0;
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ff75d3bbcd6..5a23e8b3710 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \
ip_output.o ip_sockglue.o inet_hashtables.o \
inet_timewait_sock.o inet_connection_sock.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
- tcp_minisocks.o tcp_cong.o \
+ tcp_minisocks.o tcp_cong.o tcp_metrics.o \
datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o \
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ae301c897a1..d71bfbdc0bf 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -794,6 +794,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
val = nla_get_u32(nla);
if (type == RTAX_ADVMSS && val > 65535 - 40)
val = 65535 - 40;
+ if (type == RTAX_MTU && val > 65535 - 15)
+ val = 65535 - 15;
fi->fib_metrics[type - 1] = val;
}
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4bce5a2830a..4a049449305 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -254,9 +254,10 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
/* Limit if icmp type is enabled in ratemask. */
if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
- struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr);
+ struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
rc = inet_peer_xrlim_allow(peer,
net->ipv4.sysctl_icmp_ratelimit);
+ inet_putpeer(peer);
}
out:
return rc;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 034ddbe42ad..76825be3b64 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -375,7 +375,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
const struct inet_request_sock *ireq = inet_rsk(req);
struct ip_options_rcu *opt = inet_rsk(req)->opt;
struct net *net = sock_net(sk);
- int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS;
+ int flags = inet_sk_flowi_flags(sk);
if (nocache)
flags |= FLOWI_FLAG_RT_NOCACHE;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index da90a8cab61..e1e0a4e8fd3 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -508,13 +508,9 @@ relookup:
(daddr->family == AF_INET) ?
secure_ip_id(daddr->addr.a4) :
secure_ipv6_id(daddr->addr.a6));
- p->tcp_ts_stamp = 0;
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
p->rate_last = 0;
- p->pmtu_expires = 0;
- p->pmtu_orig = 0;
- memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
INIT_LIST_HEAD(&p->gc_list);
/* Link the node. */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 72e88c20802..95bfa1ba5b2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -158,34 +158,8 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
{
- struct rtable *rt = (struct rtable *) dst;
- struct inet_peer *peer;
- u32 *p = NULL;
-
- peer = rt_get_peer_create(rt, rt->rt_dst);
- if (peer) {
- u32 *old_p = __DST_METRICS_PTR(old);
- unsigned long prev, new;
-
- p = peer->metrics;
- if (inet_metrics_new(peer))
- memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
-
- new = (unsigned long) p;
- prev = cmpxchg(&dst->_metrics, old, new);
-
- if (prev != old) {
- p = __DST_METRICS_PTR(prev);
- if (prev & DST_METRICS_READ_ONLY)
- p = NULL;
- } else {
- if (rt->fi) {
- fib_info_put(rt->fi);
- rt->fi = NULL;
- }
- }
- }
- return p;
+ WARN_ON(1);
+ return NULL;
}
static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -423,18 +397,16 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
int len;
seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
- "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
- r->dst.dev ? r->dst.dev->name : "*",
- (__force u32)r->rt_dst,
- (__force u32)r->rt_gateway,
- r->rt_flags, atomic_read(&r->dst.__refcnt),
- r->dst.__use, 0, (__force u32)r->rt_src,
- dst_metric_advmss(&r->dst) + 40,
- dst_metric(&r->dst, RTAX_WINDOW),
- (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
- dst_metric(&r->dst, RTAX_RTTVAR)),
- r->rt_key_tos,
- -1, 0, 0, &len);
+ "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
+ r->dst.dev ? r->dst.dev->name : "*",
+ (__force u32)r->rt_dst,
+ (__force u32)r->rt_gateway,
+ r->rt_flags, atomic_read(&r->dst.__refcnt),
+ r->dst.__use, 0, (__force u32)r->rt_src,
+ dst_metric_advmss(&r->dst) + 40,
+ dst_metric(&r->dst, RTAX_WINDOW), 0,
+ r->rt_key_tos,
+ -1, 0, 0, &len);
seq_printf(seq, "%*s\n", 127 - len, "");
}
@@ -671,7 +643,7 @@ static inline int rt_fast_clean(struct rtable *rth)
static inline int rt_valuable(struct rtable *rth)
{
return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
- (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires);
+ rth->dst.expires;
}
static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -917,7 +889,6 @@ static void rt_cache_invalidate(struct net *net)
get_random_bytes(&shuffle, sizeof(shuffle));
atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
- inetpeer_invalidate_family(AF_INET);
}
/*
@@ -1244,31 +1215,6 @@ skip_hashing:
return rt;
}
-static atomic_t __rt_peer_genid = ATOMIC_INIT(0);
-
-static u32 rt_peer_genid(void)
-{
- return atomic_read(&__rt_peer_genid);
-}
-
-void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
-{
- struct inet_peer_base *base;
- struct inet_peer *peer;
-
- base = inetpeer_base_ptr(rt->_peer);
- if (!base)
- return;
-
- peer = inet_getpeer_v4(base, daddr, create);
- if (peer) {
- if (!rt_set_peer(rt, peer))
- inet_putpeer(peer);
- else
- rt->rt_peer_genid = rt_peer_genid();
- }
-}
-
/*
* Peer allocation may fail only in serious out-of-memory conditions. However
* we still can generate some output.
@@ -1291,20 +1237,15 @@ static void ip_select_fb_ident(struct iphdr *iph)
void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
{
- struct rtable *rt = (struct rtable *) dst;
-
- if (rt && !(rt->dst.flags & DST_NOPEER)) {
- struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst);
+ struct net *net = dev_net(dst->dev);
+ struct inet_peer *peer;
- /* If peer is attached to destination, it is never detached,
- so that we need not to grab a lock to dereference it.
- */
- if (peer) {
- iph->id = htons(inet_getid(peer, more));
- return;
- }
- } else if (!rt)
- pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0));
+ peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
+ if (peer) {
+ iph->id = htons(inet_getid(peer, more));
+ inet_putpeer(peer);
+ return;
+ }
ip_select_fb_ident(iph);
}
@@ -1330,30 +1271,6 @@ static void rt_del(unsigned int hash, struct rtable *rt)
spin_unlock_bh(rt_hash_lock_addr(hash));
}
-static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
-{
- struct rtable *rt = (struct rtable *) dst;
- __be32 orig_gw = rt->rt_gateway;
- struct neighbour *n;
-
- dst_confirm(&rt->dst);
-
- rt->rt_gateway = peer->redirect_learned.a4;
-
- n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway);
- if (!n) {
- rt->rt_gateway = orig_gw;
- return;
- }
- if (!(n->nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
- } else {
- rt->rt_flags |= RTCF_REDIRECTED;
- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
- }
- neigh_release(n);
-}
-
/* called in rcu_read_lock() section */
void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
__be32 saddr, struct net_device *dev)
@@ -1362,7 +1279,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
struct in_device *in_dev = __in_dev_get_rcu(dev);
__be32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
- struct inet_peer *peer;
struct net *net;
if (!in_dev)
@@ -1395,6 +1311,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rthp = &rt_hash_table[hash].chain;
while ((rt = rcu_dereference(*rthp)) != NULL) {
+ struct neighbour *n;
+
rthp = &rt->dst.rt_next;
if (rt->rt_key_dst != daddr ||
@@ -1408,13 +1326,16 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rt->rt_gateway != old_gw)
continue;
- peer = rt_get_peer_create(rt, rt->rt_dst);
- if (peer) {
- if (peer->redirect_learned.a4 != new_gw) {
- peer->redirect_learned.a4 = new_gw;
- atomic_inc(&__rt_peer_genid);
+ n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
+ if (n) {
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ } else {
+ rt->rt_gateway = new_gw;
+ rt->rt_flags |= RTCF_REDIRECTED;
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
}
- check_peer_redir(&rt->dst, peer);
+ neigh_release(n);
}
}
}
@@ -1432,23 +1353,6 @@ reject_redirect:
;
}
-static bool peer_pmtu_expired(struct inet_peer *peer)
-{
- unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
-
- return orig &&
- time_after_eq(jiffies, orig) &&
- cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
-}
-
-static bool peer_pmtu_cleaned(struct inet_peer *peer)
-{
- unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
-
- return orig &&
- cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
-}
-
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
{
struct rtable *rt = (struct rtable *)dst;
@@ -1458,16 +1362,13 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
if (dst->obsolete > 0) {
ip_rt_put(rt);
ret = NULL;
- } else if (rt->rt_flags & RTCF_REDIRECTED) {
+ } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
+ rt->dst.expires) {
unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
rt->rt_oif,
rt_genid(dev_net(dst->dev)));
rt_del(hash, rt);
ret = NULL;
- } else if (rt_has_peer(rt)) {
- struct inet_peer *peer = rt_peer_ptr(rt);
- if (peer_pmtu_expired(peer))
- dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
}
}
return ret;
@@ -1494,6 +1395,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
struct rtable *rt = skb_rtable(skb);
struct in_device *in_dev;
struct inet_peer *peer;
+ struct net *net;
int log_martians;
rcu_read_lock();
@@ -1505,7 +1407,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
rcu_read_unlock();
- peer = rt_get_peer_create(rt, rt->rt_dst);
+ net = dev_net(rt->dst.dev);
+ peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
if (!peer) {
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
return;
@@ -1522,7 +1425,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
*/
if (peer->rate_tokens >= ip_rt_redirect_number) {
peer->rate_last = jiffies;
- return;
+ goto out_put_peer;
}
/* Check for load limit; set rate_last to the latest sent
@@ -1543,6 +1446,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
&rt->rt_dst, &rt->rt_gateway);
#endif
}
+out_put_peer:
+ inet_putpeer(peer);
}
static int ip_error(struct sk_buff *skb)
@@ -1585,7 +1490,7 @@ static int ip_error(struct sk_buff *skb)
break;
}
- peer = rt_get_peer_create(rt, rt->rt_dst);
+ peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
send = true;
if (peer) {
@@ -1598,6 +1503,7 @@ static int ip_error(struct sk_buff *skb)
peer->rate_tokens -= ip_rt_error_cost;
else
send = false;
+ inet_putpeer(peer);
}
if (send)
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
@@ -1606,50 +1512,17 @@ out: kfree_skb(skb);
return 0;
}
-static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
-{
- unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
-
- if (!expires)
- return;
- if (time_before(jiffies, expires)) {
- u32 orig_dst_mtu = dst_mtu(dst);
- if (peer->pmtu_learned < orig_dst_mtu) {
- if (!peer->pmtu_orig)
- peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU);
- dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned);
- }
- } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires)
- dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
-}
-
static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
{
struct rtable *rt = (struct rtable *) dst;
- struct inet_peer *peer;
dst_confirm(dst);
- peer = rt_get_peer_create(rt, rt->rt_dst);
- if (peer) {
- unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
-
- if (mtu < ip_rt_min_pmtu)
- mtu = ip_rt_min_pmtu;
- if (!pmtu_expires || mtu < peer->pmtu_learned) {
-
- pmtu_expires = jiffies + ip_rt_mtu_expires;
- if (!pmtu_expires)
- pmtu_expires = 1UL;
-
- peer->pmtu_learned = mtu;
- peer->pmtu_expires = pmtu_expires;
+ if (mtu < ip_rt_min_pmtu)
+ mtu = ip_rt_min_pmtu;
- atomic_inc(&__rt_peer_genid);
- rt->rt_peer_genid = rt_peer_genid();
- }
- check_peer_pmtu(dst, peer);
- }
+ rt->rt_pmtu = mtu;
+ dst_set_expires(&rt->dst, ip_rt_mtu_expires);
}
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
@@ -1660,7 +1533,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
struct rtable *rt;
flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
- protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS,
+ protocol, flow_flags,
iph->daddr, iph->saddr, 0, 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
@@ -1681,30 +1554,12 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
-static void ipv4_validate_peer(struct rtable *rt)
-{
- if (rt->rt_peer_genid != rt_peer_genid()) {
- struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst);
-
- if (peer) {
- check_peer_pmtu(&rt->dst, peer);
-
- if (peer->redirect_learned.a4 &&
- peer->redirect_learned.a4 != rt->rt_gateway)
- check_peer_redir(&rt->dst, peer);
- }
-
- rt->rt_peer_genid = rt_peer_genid();
- }
-}
-
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
{
struct rtable *rt = (struct rtable *) dst;
if (rt_is_expired(rt))
return NULL;
- ipv4_validate_peer(rt);
return dst;
}
@@ -1716,10 +1571,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
fib_info_put(rt->fi);
rt->fi = NULL;
}
- if (rt_has_peer(rt)) {
- struct inet_peer *peer = rt_peer_ptr(rt);
- inet_putpeer(peer);
- }
}
@@ -1730,11 +1581,8 @@ static void ipv4_link_failure(struct sk_buff *skb)
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
rt = skb_rtable(skb);
- if (rt && rt_has_peer(rt)) {
- struct inet_peer *peer = rt_peer_ptr(rt);
- if (peer_pmtu_cleaned(peer))
- dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig);
- }
+ if (rt)
+ dst_set_expires(&rt->dst, 0);
}
static int ip_rt_bug(struct sk_buff *skb)
@@ -1814,7 +1662,13 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
static unsigned int ipv4_mtu(const struct dst_entry *dst)
{
const struct rtable *rt = (const