diff options
-rw-r--r-- | include/linux/rtnetlink.h | 3 | ||||
-rw-r--r-- | include/linux/tcp.h | 1 | ||||
-rw-r--r-- | include/net/dst.h | 6 | ||||
-rw-r--r-- | include/net/flow.h | 5 | ||||
-rw-r--r-- | include/net/inet_connection_sock.h | 1 | ||||
-rw-r--r-- | include/net/inet_sock.h | 2 | ||||
-rw-r--r-- | include/net/inetpeer.h | 8 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 3 | ||||
-rw-r--r-- | include/net/route.h | 61 | ||||
-rw-r--r-- | include/net/tcp.h | 9 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 4 | ||||
-rw-r--r-- | net/decnet/dn_route.c | 13 | ||||
-rw-r--r-- | net/ipv4/Makefile | 2 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 2 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 3 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 2 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 4 | ||||
-rw-r--r-- | net/ipv4/route.c | 349 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 188 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_metrics.c | 697 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 62 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 8 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 4 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 10 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 8 | ||||
-rw-r--r-- | net/ipv6/route.c | 16 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 49 |
29 files changed, 837 insertions, 731 deletions
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ea60b085410..db71c4ad862 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -619,8 +619,7 @@ extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, - u32 id, u32 ts, u32 tsage, long expires, - u32 error); + u32 id, long expires, u32 error); extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7d3bcedc062..2de9cf46f9f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -506,7 +506,6 @@ struct tcp_timewait_sock { u32 tw_rcv_wnd; u32 tw_ts_recent; long tw_ts_recent_stamp; - struct inet_peer *tw_peer; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif diff --git a/include/net/dst.h b/include/net/dst.h index b2634e44661..51610468c63 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -209,12 +209,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr return msecs_to_jiffies(dst_metric(dst, metric)); } -static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, - unsigned long rtt) -{ - dst_metric_set(dst, metric, jiffies_to_msecs(rtt)); -} - static inline u32 dst_allfrag(const struct dst_entry *dst) { diff --git a/include/net/flow.h b/include/net/flow.h index bd524f59856..ce9cb7656b4 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -20,9 +20,8 @@ struct flowi_common { __u8 flowic_proto; __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 -#define FLOWI_FLAG_PRECOW_METRICS 0x02 -#define FLOWI_FLAG_CAN_SLEEP 0x04 -#define FLOWI_FLAG_RT_NOCACHE 0x08 +#define FLOWI_FLAG_CAN_SLEEP 0x02 +#define FLOWI_FLAG_RT_NOCACHE 0x04 __u32 flowic_secid; }; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index af3c743a40e..291e7cee14e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -43,7 +43,6 @@ struct inet_connection_sock_af_ops { struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); - struct inet_peer *(*get_peer)(struct sock *sk); u16 net_header_len; u16 net_frag_header_len; u16 sockaddr_len; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ae17e1352d7..924d7b98ab6 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -245,8 +245,6 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) flags |= FLOWI_FLAG_ANYSRC; - if (sk->sk_protocol == IPPROTO_TCP) - flags |= FLOWI_FLAG_PRECOW_METRICS; return flags; } diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index c27c8f10ebd..53f464d7cdd 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -36,25 +36,19 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ unsigned long rate_last; - unsigned long pmtu_expires; - u32 pmtu_orig; - u32 pmtu_learned; - struct inetpeer_addr_base redirect_learned; union { struct list_head gc_list; struct rcu_head gc_rcu; }; /* * Once inet_peer is queued for deletion (refcnt == -1), following fields - * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp + * are not available: rid, ip_id_count * We can share memory with rcu_head to help keep inet_peer small. */ union { struct { atomic_t rid; /* Frag reception counter */ atomic_t ip_id_count; /* IP ID for the next packet */ - __u32 tcp_ts; - __u32 tcp_ts_stamp; }; struct rcu_head rcu; struct inet_peer *gc_next; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 599e48fa97c..2e089a99d60 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -7,6 +7,7 @@ #include <net/inet_frag.h> +struct tcpm_hash_bucket; struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; @@ -39,6 +40,8 @@ struct netns_ipv4 { struct sock **icmp_sk; struct sock *tcp_sock; struct inet_peer_base *peers; + struct tcpm_hash_bucket *tcp_metrics_hash; + unsigned int tcp_metrics_hash_mask; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/include/net/route.h b/include/net/route.h index 211e2665139..52362368af0 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -40,7 +40,6 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) struct fib_nh; -struct inet_peer; struct fib_info; struct rtable { struct dst_entry dst; @@ -65,45 +64,10 @@ struct rtable { __be32 rt_gateway; /* Miscellaneous cached information */ - u32 rt_peer_genid; - unsigned long _peer; /* long-living peer info */ + u32 rt_pmtu; struct fib_info *fi; /* for client ref to shared metrics */ }; -static inline struct inet_peer *rt_peer_ptr(struct rtable *rt) -{ - return inetpeer_ptr(rt->_peer); -} - -static inline bool rt_has_peer(struct rtable *rt) -{ - return inetpeer_ptr_is_peer(rt->_peer); -} - -static inline void __rt_set_peer(struct rtable *rt, struct inet_peer *peer) -{ - __inetpeer_ptr_set_peer(&rt->_peer, peer); -} - -static inline bool rt_set_peer(struct rtable *rt, struct inet_peer *peer) -{ - return inetpeer_ptr_set_peer(&rt->_peer, peer); -} - -static inline void rt_init_peer(struct rtable *rt, struct inet_peer_base *base) -{ - inetpeer_init_ptr(&rt->_peer, base); -} - -static inline void rt_transfer_peer(struct rtable *rt, struct rtable *ort) -{ - rt->_peer = ort->_peer; - if (rt_has_peer(ort)) { - struct inet_peer *peer = rt_peer_ptr(ort); - atomic_inc(&peer->refcnt); - } -} - static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_route_iif != 0; @@ -278,8 +242,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - if (protocol == IPPROTO_TCP) - flow_flags |= FLOWI_FLAG_PRECOW_METRICS; if (can_sleep) flow_flags |= FLOWI_FLAG_CAN_SLEEP; @@ -328,27 +290,6 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable return rt; } -extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create); - -static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create) -{ - if (rt_has_peer(rt)) - return rt_peer_ptr(rt); - - rt_bind_peer(rt, daddr, create); - return (rt_has_peer(rt) ? rt_peer_ptr(rt) : NULL); -} - -static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) -{ - return __rt_get_peer(rt, daddr, 0); -} - -static inline struct inet_peer *rt_get_peer_create(struct rtable *rt, __be32 daddr) -{ - return __rt_get_peer(rt, daddr, 1); -} - static inline int inet_iif(const struct sk_buff *skb) { return skb_rtable(skb)->rt_iif; diff --git a/include/net/tcp.h b/include/net/tcp.h index 53fb7d81417..3618fefae04 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -388,6 +388,13 @@ extern void tcp_enter_frto(struct sock *sk); extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); +extern void tcp_init_metrics(struct sock *sk); +extern void tcp_metrics_init(void); +extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); +extern bool tcp_remember_stamp(struct sock *sk); +extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); +extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); +extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); extern void tcp_init_sock(struct sock *sk); extern unsigned int tcp_poll(struct file * file, struct socket *sock, @@ -556,6 +563,8 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return (tp->srtt >> 3) + tp->rttvar; } +extern void tcp_set_rto(struct sock *sk); + static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2b325c340b4..64127eee786 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -615,7 +615,7 @@ nla_put_failure: EXPORT_SYMBOL(rtnetlink_put_metrics); int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, - u32 ts, u32 tsage, long expires, u32 error) + long expires, u32 error) { struct rta_cacheinfo ci = { .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), @@ -623,8 +623,6 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, .rta_clntref = atomic_read(&(dst->__refcnt)), .rta_error = error, .rta_id = id, - .rta_ts = ts, - .rta_tsage = tsage, }; if (expires) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 6e74b3f110b..b5594cc73ee 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1590,7 +1590,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, goto errout; expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto errout; @@ -1812,12 +1812,11 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", - rt->dst.dev ? rt->dst.dev->name : "*", - dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), - dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), - atomic_read(&rt->dst.__refcnt), - rt->dst.__use, - (int) dst_metric(&rt->dst, RTAX_RTT)); + rt->dst.dev ? rt->dst.dev->name : "*", + dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), + dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), + atomic_read(&rt->dst.__refcnt), + rt->dst.__use, 0); return 0; } diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ff75d3bbcd6..5a23e8b3710 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_output.o ip_sockglue.o inet_hashtables.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ - tcp_minisocks.o tcp_cong.o \ + tcp_minisocks.o tcp_cong.o tcp_metrics.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ae301c897a1..d71bfbdc0bf 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -794,6 +794,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) val = nla_get_u32(nla); if (type == RTAX_ADVMSS && val > 65535 - 40) val = 65535 - 40; + if (type == RTAX_MTU && val > 65535 - 15) + val = 65535 - 15; fi->fib_metrics[type - 1] = val; } } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4bce5a2830a..4a049449305 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -254,9 +254,10 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, /* Limit if icmp type is enabled in ratemask. */ if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { - struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr); + struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + inet_putpeer(peer); } out: return rc; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 034ddbe42ad..76825be3b64 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -375,7 +375,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); - int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS; + int flags = inet_sk_flowi_flags(sk); if (nocache) flags |= FLOWI_FLAG_RT_NOCACHE; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da90a8cab61..e1e0a4e8fd3 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -508,13 +508,9 @@ relookup: (daddr->family == AF_INET) ? secure_ip_id(daddr->addr.a4) : secure_ipv6_id(daddr->addr.a6)); - p->tcp_ts_stamp = 0; p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->rate_last = 0; - p->pmtu_expires = 0; - p->pmtu_orig = 0; - memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 72e88c20802..95bfa1ba5b2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -158,34 +158,8 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) { - struct rtable *rt = (struct rtable *) dst; - struct inet_peer *peer; - u32 *p = NULL; - - peer = rt_get_peer_create(rt, rt->rt_dst); - if (peer) { - u32 *old_p = __DST_METRICS_PTR(old); - unsigned long prev, new; - - p = peer->metrics; - if (inet_metrics_new(peer)) - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); - - new = (unsigned long) p; - prev = cmpxchg(&dst->_metrics, old, new); - - if (prev != old) { - p = __DST_METRICS_PTR(prev); - if (prev & DST_METRICS_READ_ONLY) - p = NULL; - } else { - if (rt->fi) { - fib_info_put(rt->fi); - rt->fi = NULL; - } - } - } - return p; + WARN_ON(1); + return NULL; } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -423,18 +397,16 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) int len; seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" - "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", - r->dst.dev ? r->dst.dev->name : "*", - (__force u32)r->rt_dst, - (__force u32)r->rt_gateway, - r->rt_flags, atomic_read(&r->dst.__refcnt), - r->dst.__use, 0, (__force u32)r->rt_src, - dst_metric_advmss(&r->dst) + 40, - dst_metric(&r->dst, RTAX_WINDOW), - (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + - dst_metric(&r->dst, RTAX_RTTVAR)), - r->rt_key_tos, - -1, 0, 0, &len); + "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", + r->dst.dev ? r->dst.dev->name : "*", + (__force u32)r->rt_dst, + (__force u32)r->rt_gateway, + r->rt_flags, atomic_read(&r->dst.__refcnt), + r->dst.__use, 0, (__force u32)r->rt_src, + dst_metric_advmss(&r->dst) + 40, + dst_metric(&r->dst, RTAX_WINDOW), 0, + r->rt_key_tos, + -1, 0, 0, &len); seq_printf(seq, "%*s\n", 127 - len, ""); } @@ -671,7 +643,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); + rth->dst.expires; } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -917,7 +889,6 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_family(AF_INET); } /* @@ -1244,31 +1215,6 @@ skip_hashing: return rt; } -static atomic_t __rt_peer_genid = ATOMIC_INIT(0); - -static u32 rt_peer_genid(void) -{ - return atomic_read(&__rt_peer_genid); -} - -void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_peer); - if (!base) - return; - - peer = inet_getpeer_v4(base, daddr, create); - if (peer) { - if (!rt_set_peer(rt, peer)) - inet_putpeer(peer); - else - rt->rt_peer_genid = rt_peer_genid(); - } -} - /* * Peer allocation may fail only in serious out-of-memory conditions. However * we still can generate some output. @@ -1291,20 +1237,15 @@ static void ip_select_fb_ident(struct iphdr *iph) void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) { - struct rtable *rt = (struct rtable *) dst; - - if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst); + struct net *net = dev_net(dst->dev); + struct inet_peer *peer; - /* If peer is attached to destination, it is never detached, - so that we need not to grab a lock to dereference it. - */ - if (peer) { - iph->id = htons(inet_getid(peer, more)); - return; - } - } else if (!rt) - pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0)); + peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); + if (peer) { + iph->id = htons(inet_getid(peer, more)); + inet_putpeer(peer); + return; + } ip_select_fb_ident(iph); } @@ -1330,30 +1271,6 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) -{ - struct rtable *rt = (struct rtable *) dst; - __be32 orig_gw = rt->rt_gateway; - struct neighbour *n; - - dst_confirm(&rt->dst); - - rt->rt_gateway = peer->redirect_learned.a4; - - n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); - if (!n) { - rt->rt_gateway = orig_gw; - return; - } - if (!(n->nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - } else { - rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); - } - neigh_release(n); -} - /* called in rcu_read_lock() section */ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) @@ -1362,7 +1279,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, struct in_device *in_dev = __in_dev_get_rcu(dev); __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; - struct inet_peer *peer; struct net *net; if (!in_dev) @@ -1395,6 +1311,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rthp = &rt_hash_table[hash].chain; while ((rt = rcu_dereference(*rthp)) != NULL) { + struct neighbour *n; + rthp = &rt->dst.rt_next; if (rt->rt_key_dst != daddr || @@ -1408,13 +1326,16 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway != old_gw) continue; - peer = rt_get_peer_create(rt, rt->rt_dst); - if (peer) { - if (peer->redirect_learned.a4 != new_gw) { - peer->redirect_learned.a4 = new_gw; - atomic_inc(&__rt_peer_genid); + n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); + if (n) { + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + } else { + rt->rt_gateway = new_gw; + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } - check_peer_redir(&rt->dst, peer); + neigh_release(n); } } } @@ -1432,23 +1353,6 @@ reject_redirect: ; } -static bool peer_pmtu_expired(struct inet_peer *peer) -{ - unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); - - return orig && - time_after_eq(jiffies, orig) && - cmpxchg(&peer->pmtu_expires, orig, 0) == orig; -} - -static bool peer_pmtu_cleaned(struct inet_peer *peer) -{ - unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); - - return orig && - cmpxchg(&peer->pmtu_expires, orig, 0) == orig; -} - static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1458,16 +1362,13 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) if (dst->obsolete > 0) { ip_rt_put(rt); ret = NULL; - } else if (rt->rt_flags & RTCF_REDIRECTED) { + } else if ((rt->rt_flags & RTCF_REDIRECTED) || + rt->dst.expires) { unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, rt->rt_oif, rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt_has_peer(rt)) { - struct inet_peer *peer = rt_peer_ptr(rt); - if (peer_pmtu_expired(peer)) - dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); } } return ret; @@ -1494,6 +1395,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) struct rtable *rt = skb_rtable(skb); struct in_device *in_dev; struct inet_peer *peer; + struct net *net; int log_martians; rcu_read_lock(); @@ -1505,7 +1407,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) log_martians = IN_DEV_LOG_MARTIANS(in_dev); rcu_read_unlock(); - peer = rt_get_peer_create(rt, rt->rt_dst); + net = dev_net(rt->dst.dev); + peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); if (!peer) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); return; @@ -1522,7 +1425,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) */ if (peer->rate_tokens >= ip_rt_redirect_number) { peer->rate_last = jiffies; - return; + goto out_put_peer; } /* Check for load limit; set rate_last to the latest sent @@ -1543,6 +1446,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) &rt->rt_dst, &rt->rt_gateway); #endif } +out_put_peer: + inet_putpeer(peer); } static int ip_error(struct sk_buff *skb) @@ -1585,7 +1490,7 @@ static int ip_error(struct sk_buff *skb) break; } - peer = rt_get_peer_create(rt, rt->rt_dst); + peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); send = true; if (peer) { @@ -1598,6 +1503,7 @@ static int ip_error(struct sk_buff *skb) peer->rate_tokens -= ip_rt_error_cost; else send = false; + inet_putpeer(peer); } if (send) icmp_send(skb, ICMP_DEST_UNREACH, code, 0); @@ -1606,50 +1512,17 @@ out: kfree_skb(skb); return 0; } -static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) -{ - unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); - - if (!expires) - return; - if (time_before(jiffies, expires)) { - u32 orig_dst_mtu = dst_mtu(dst); - if (peer->pmtu_learned < orig_dst_mtu) { - if (!peer->pmtu_orig) - peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); - dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); - } - } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) - dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); -} - static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { struct rtable *rt = (struct rtable *) dst; - struct inet_peer *peer; dst_confirm(dst); - peer = rt_get_peer_create(rt, rt->rt_dst); - if (peer) { - unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); - - if (mtu < ip_rt_min_pmtu) - mtu = ip_rt_min_pmtu; - if (!pmtu_expires || mtu < peer->pmtu_learned) { - - pmtu_expires = jiffies + ip_rt_mtu_expires; - if (!pmtu_expires) - pmtu_expires = 1UL; - - peer->pmtu_learned = mtu; - peer->pmtu_expires = pmtu_expires; + if (mtu < ip_rt_min_pmtu) + mtu = ip_rt_min_pmtu; - atomic_inc(&__rt_peer_genid); - rt->rt_peer_genid = rt_peer_genid(); - } - check_peer_pmtu(dst, peer); - } + rt->rt_pmtu = mtu; + dst_set_expires(&rt->dst, ip_rt_mtu_expires); } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, @@ -1660,7 +1533,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, struct rtable *rt; flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, + protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1681,30 +1554,12 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); -static void ipv4_validate_peer(struct rtable *rt) -{ - if (rt->rt_peer_genid != rt_peer_genid()) { - struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); - - if (peer) { - check_peer_pmtu(&rt->dst, peer); - - if (peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) - check_peer_redir(&rt->dst, peer); - } - - rt->rt_peer_genid = rt_peer_genid(); - } -} - static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; if (rt_is_expired(rt)) return NULL; - ipv4_validate_peer(rt); return dst; } @@ -1716,10 +1571,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst) fib_info_put(rt->fi); rt->fi = NULL; } - if (rt_has_peer(rt)) { - struct inet_peer *peer = rt_peer_ptr(rt); - inet_putpeer(peer); - } } @@ -1730,11 +1581,8 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && rt_has_peer(rt)) { - struct inet_peer *peer = rt_peer_ptr(rt); - if (peer_pmtu_cleaned(peer)) - dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); - } + if (rt) + dst_set_expires(&rt->dst, 0); } static int ip_rt_bug(struct sk_buff *skb) @@ -1814,7 +1662,13 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) static unsigned int ipv4_mtu(const struct dst_entry *dst) { const struct rtable *rt = (const |