diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/br_netfilter.c | 6 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 13 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 37 | ||||
-rw-r--r-- | net/decnet/dn_route.c | 12 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 23 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 46 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 267 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 13 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 10 | ||||
-rw-r--r-- | net/ipv6/inet6_connection_sock.c | 49 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 6 | ||||
-rw-r--r-- | net/ipv6/route.c | 21 | ||||
-rw-r--r-- | net/ipv6/sit.c | 2 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 39 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 10 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 4 | ||||
-rw-r--r-- | net/sctp/associola.c | 4 | ||||
-rw-r--r-- | net/sctp/input.c | 6 | ||||
-rw-r--r-- | net/sctp/output.c | 2 | ||||
-rw-r--r-- | net/sctp/socket.c | 6 | ||||
-rw-r--r-- | net/sctp/transport.c | 14 |
23 files changed, 418 insertions, 176 deletions
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 81f76c402cf..68e8f364bbf 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,11 +111,13 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) -static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void fake_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 129ed8f7413..ab4f44c9bb2 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -161,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, if (sk->sk_state == DCCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ @@ -200,7 +193,7 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 090c0800ce0..56840b249f3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -134,7 +134,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } if (type == ICMPV6_PKT_TOOBIG) { @@ -145,39 +145,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; - /* icmp should have updated the destination cache entry */ - dst = __sk_dst_check(sk, np->dst_cookie); - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct flowi6 fl6; - - /* BUGGG_FUTURE: Again, it is not clear how - to handle rthdr case. Ignore this complexity - for now. - */ - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = np->daddr; - fl6.saddr = np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.fl6_dport = inet->inet_dport; - fl6.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - - dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - goto out; - } - } else - dst_hold(dst); - - dst->ops->update_pmtu(dst, ntohl(info)); + dst = inet6_csk_update_pmtu(sk, ntohl(info)); + if (!dst) + goto out; - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) dccp_sync_mss(sk, dst_mtu(dst)); - } /* else let the usual retransmit timer handle it */ - dst_release(dst); goto out; } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index e9c4e2e864c..47de90d8fe9 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -117,8 +117,10 @@ static void dn_dst_destroy(struct dst_entry *); static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb , u32 mtu); +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); @@ -266,7 +268,8 @@ static int dn_dst_gc(struct dst_ops *ops) * We update both the mtu and the advertised mss (i.e. the segment size we * advertise to the other end). */ -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; @@ -294,7 +297,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d71bfbdc0bf..1e09852df51 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -140,6 +140,27 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }, }; +static void free_nh_exceptions(struct fib_nh *nh) +{ + struct fnhe_hash_bucket *hash = nh->nh_exceptions; + int i; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + fnhe = rcu_dereference(hash[i].chain); + while (fnhe) { + struct fib_nh_exception *next; + + next = rcu_dereference(fnhe->fnhe_next); + kfree(fnhe); + + fnhe = next; + } + } + kfree(hash); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { @@ -148,6 +169,8 @@ static void free_fib_info_rcu(struct rcu_head *head) change_nexthops(fi) { if (nexthop_nh->nh_dev) dev_put(nexthop_nh->nh_dev); + if (nexthop_nh->nh_exceptions) + free_nh_exceptions(nexthop_nh); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 76825be3b64..3ea465286a3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -803,3 +803,49 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); #endif + +static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) +{ + struct inet_sock *inet = inet_sk(sk); + struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + struct flowi4 *fl4; + struct rtable *rt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + fl4 = &fl->u.ip4; + rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_protocol, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + if (IS_ERR(rt)) + rt = NULL; + if (rt) + sk_setup_caps(sk, &rt->dst); + rcu_read_unlock(); + + return &rt->dst; +} + +struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct dst_entry *dst = __sk_dst_check(sk, 0); + struct inet_sock *inet = inet_sk(sk); + + if (!dst) { + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); + if (!dst) + goto out; + } + dst->ops->update_pmtu(dst, sk, NULL, mtu); + + dst = __sk_dst_check(sk, 0); + if (!dst) + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); +out: + return dst; +} +EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0c3123566d7..42c44b1403c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -833,7 +833,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c2d0e6d8baa..2c2c35bace7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -519,7 +519,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if ((old_iph->frag_off & htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aad21819316..a5bd0b4acc6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -148,8 +148,10 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1273,14 +1275,130 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, + const struct iphdr *iph, + int oif, u8 tos, + u8 prot, u32 mark, int flow_flags) +{ + if (sk) { + const struct inet_sock *inet = inet_sk(sk); + + oif = sk->sk_bound_dev_if; + mark = sk->sk_mark; + tos = RT_CONN_FLAGS(sk); + prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; + } + flowi4_init_output(fl4, oif, mark, tos, + RT_SCOPE_UNIVERSE, prot, + flow_flags, + iph->daddr, iph->saddr, 0, 0); +} + +static void build_skb_flow_key(struct flowi4 *fl4, struct sk_buff *skb, struct sock *sk) +{ + const struct iphdr *iph = ip_hdr(skb); + int oif = skb->dev->ifindex; + u8 tos = RT_TOS(iph->tos); + u8 prot = iph->protocol; + u32 mark = skb->mark; + + __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); +} + +static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + inet_sk_flowi_flags(sk), + daddr, inet->inet_saddr, 0, 0); + rcu_read_unlock(); +} + +static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk, + struct sk_buff *skb) +{ + if (skb) + build_skb_flow_key(fl4, skb, sk); + else + build_sk_flow_key(fl4, sk); +} + +static DEFINE_SPINLOCK(fnhe_lock); + +static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash, __be32 daddr) +{ + struct fib_nh_exception *fnhe, *oldest; + + oldest = rcu_dereference(hash->chain); + for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) + oldest = fnhe; + } + return oldest; +} + +static struct fib_nh_exception *find_or_create_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fib_nh_exception *fnhe; + int depth; + u32 hval; + + if (!hash) { + hash = nh->nh_exceptions = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), + GFP_ATOMIC); + if (!hash) + return NULL; + } + + hval = (__force u32) daddr; + hval ^= (hval >> 11) ^ (hval >> 22); + hash += hval; + + depth = 0; + for (fnhe = rcu_dereference(hash->chain); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + if (fnhe->fnhe_daddr == daddr) + goto out; + depth++; + } + + if (depth > FNHE_RECLAIM_DEPTH) { + fnhe = fnhe_oldest(hash + hval, daddr); + goto out_daddr; + } + fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); + if (!fnhe) + return NULL; + + fnhe->fnhe_next = hash->chain; + rcu_assign_pointer(hash->chain, fnhe); + +out_daddr: + fnhe->fnhe_daddr = daddr; +out: + fnhe->fnhe_stamp = jiffies; + return fnhe; +} + +static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; struct net_device *dev = skb->dev; struct in_device *in_dev; + struct fib_result res; struct neighbour *n; - struct rtable *rt; struct net *net; switch (icmp_hdr(skb)->code & 7) { @@ -1294,7 +1412,6 @@ static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) return; } - rt = (struct rtable *) dst; if (rt->rt_gateway != old_gw) return; @@ -1318,11 +1435,21 @@ static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) goto reject_redirect; } - n = ipv4_neigh_lookup(dst, NULL, &new_gw); + n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); if (n) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); } else { + if (fib_lookup(net, fl4, &res) == 0) { + struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_exception *fnhe; + + spin_lock_bh(&fnhe_lock); + fnhe = find_or_create_fnhe(nh, fl4->daddr); + if (fnhe) + fnhe->fnhe_gw = new_gw; + spin_unlock_bh(&fnhe_lock); + } rt->rt_gateway = new_gw; rt->rt_flags |= RTCF_REDIRECTED; call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); @@ -1347,6 +1474,17 @@ reject_redirect: ; } +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) +{ + struct rtable *rt; + struct flowi4 fl4; + + rt = (struct rtable *) dst; + + ip_rt_build_flow_key(&fl4, sk, skb); + __ip_do_redirect(rt, skb, &fl4); +} + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1506,32 +1644,51 @@ out: kfree_skb(skb); return 0; } -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { - struct rtable *rt = (struct rtable *) dst; - - dst_confirm(dst); + struct fib_result res; if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { + struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_exception *fnhe; + + spin_lock_bh(&fnhe_lock); + fnhe = find_or_create_fnhe(nh, fl4->daddr); + if (fnhe) { + fnhe->fnhe_pmtu = mtu; + fnhe->fnhe_expires = jiffies + ip_rt_mtu_expires; + } + spin_unlock_bh(&fnhe_lock); + } rt->rt_pmtu = mtu; dst_set_expires(&rt->dst, ip_rt_mtu_expires); } +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ + struct rtable *rt = (struct rtable *) dst; + struct flowi4 fl4; + + ip_rt_build_flow_key(&fl4, sk, skb); + __ip_rt_update_pmtu(rt, &fl4, mtu); +} + void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags) { - const struct iphdr *iph = (const struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags, - iph->daddr, iph->saddr, 0, 0); + __build_flow_key(&fl4, NULL, iph, oif, + RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_rt_update_pmtu(&rt->dst, mtu); + __ip_rt_update_pmtu(rt, &fl4, mtu); ip_rt_put(rt); } } @@ -1539,27 +1696,31 @@ EXPORT_SYMBOL_GPL(ipv4_update_pmtu); void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) { - const struct inet_sock *inet = inet_sk(sk); + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; - return ipv4_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk)); + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(sock_net(sk), &fl4); + if (!IS_ERR(rt)) { + __ip_rt_update_pmtu(rt, &fl4, mtu); + ip_rt_put(rt); + } } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, u8 protocol, int flow_flags) { - const struct iphdr *iph = (const struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); + __build_flow_key(&fl4, NULL, iph, oif, + RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_do_redirect(&rt->dst, skb); + __ip_do_redirect(rt, skb, &fl4); ip_rt_put(rt); } } @@ -1567,12 +1728,16 @@ EXPORT_SYMBOL_GPL(ipv4_redirect); void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) { - const struct inet_sock *inet = inet_sk(sk); + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; - return ipv4_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, - sk->sk_mark, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk)); + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(sock_net(sk), &fl4); + if (!IS_ERR(rt)) { + __ip_do_redirect(rt, skb, &fl4); + ip_rt_put(rt); + } } EXPORT_SYMBOL_GPL(ipv4_sk_redirect); @@ -1719,14 +1884,46 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, dst_init_metrics(&rt->dst, fi->fib_metrics, true); } +static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fib_nh_exception *fnhe; + u32 hval; + + hval = (__force u32) daddr; + hval ^= (hval >> 11) ^ (hval >> 22); + + for (fnhe = rcu_dereference(hash[hval].chain); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + if (fnhe->fnhe_daddr == daddr) { + if (fnhe->fnhe_pmtu) { + unsigned long expires = fnhe->fnhe_expires; + unsigned long diff = jiffies - expires; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = fnhe->fnhe_pmtu; + dst_set_expires(&rt->dst, diff); + } + } + if (fnhe->fnhe_gw) + rt->rt_gateway = fnhe->fnhe_gw; + fnhe->fnhe_stamp = jiffies; + break; + } + } +} + static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, const struct fib_result *res, struct fib_info *fi, u16 type, u32 itag) { if (fi) { - if (FIB_RES_GW(*res) && - FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - rt->rt_gateway = FIB_RES_GW(*res); + struct fib_nh *nh = &FIB_RES_NH(*res); + + if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + rt->rt_gateway = nh->nh_gw; + if (unlikely(nh->nh_exceptions)) + rt_bind_exception(rt, nh, fl4->daddr); rt_init_metrics(rt, fl4, fi); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; @@ -2587,11 +2784,13 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7a0062cb4ed..d9caf5c07aa 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -289,17 +289,10 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) if (sk->sk_state == TCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ @@ -326,7 +319,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 737131cef37..fcf7678bc00 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -194,20 +194,22 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); } -static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); } -static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->redirect(path, skb); + path->ops->redirect(path, sk, skb); } static void xfrm4_dst_destroy(struct dst_entry *dst) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index bceb14450a1..4a0c4d2d8b0 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -203,15 +203,13 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) return dst; } -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +static struct dst_entry *inet6_csk_route_socket(struct sock *sk) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi6 fl6; - struct dst_entry *dst; struct in6_addr *final_p, final; - int res; + struct dst_entry *dst; + struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; @@ -228,18 +226,29 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) final_p = fl6_update_dst(&fl6, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { + if (!dst) { dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - sk->sk_route_caps = 0; - kfree_skb(skb); - return PTR_ERR(dst); - } + if (!IS_ERR(dst)) + __inet6_csk_dst_store(sk, dst, NULL, NULL); + } + return dst; +} - __inet6_csk_dst_store(sk, dst, NULL, NULL); +int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +{ + struct sock *sk = skb->sk; + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 fl6; + struct dst_entry *dst; + int res; + + dst = inet6_csk_route_socket(sk); + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); + sk->sk_route_caps = 0; + kfree_skb(skb); + return PTR_ERR(dst); } rcu_read_lock(); @@ -253,3 +262,15 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) return res; } EXPORT_SYMBOL_GPL(inet6_csk_xmit); + +struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct dst_entry *dst = inet6_csk_route_socket(sk); + + if (IS_ERR(dst)) + return NULL; + dst->ops->update_pmtu(dst, sk, NULL, mtu); + + return inet6_csk_route_socket(sk); +} +EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 61d10659729..db328466796 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -609,10 +609,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); } if (rel_type == ICMP_REDIRECT) - skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); + skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -952,7 +952,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { *pmtu = mtu; err = -EMSGSIZE; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 412fad809a3..84f6564dd37 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,8 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -187,11 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } @@ -1071,7 +1075,8 @@ static void ip6_link_failure(struct sk_buff *skb) } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; @@ -1108,7 +1113,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - ip6_rt_update_pmtu(dst, ntohl(mtu)); + ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); @@ -1136,7 +1141,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - rt6_do_redirect(dst, skb); + rt6_do_redirect(dst, NULL, skb); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_redirect); @@ -1639,7 +1644,7 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fbf1622fdee..3bd1bfc01f8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -807,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { |