diff options
Diffstat (limited to 'net/ipv6/route.c')
| -rw-r--r-- | net/ipv6/route.c | 318 |
1 files changed, 157 insertions, 161 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c979dd96d82..f23fbd28a50 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -66,8 +66,9 @@ #endif enum rt6_nud_state { - RT6_NUD_FAIL_HARD = -2, - RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_FAIL_HARD = -3, + RT6_NUD_FAIL_PROBE = -2, + RT6_NUD_FAIL_DO_RR = -1, RT6_NUD_SUCCEED = 1 }; @@ -83,7 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *, static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); -static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); +static int ip6_pkt_prohibit(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -101,6 +104,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif +static void rt6_bind_peer(struct rt6_info *rt, int create) +{ + struct inet_peer_base *base; + struct inet_peer *peer; + + base = inetpeer_base_ptr(rt->_rt6i_peer); + if (!base) + return; + + peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); + if (peer) { + if (!rt6_set_peer(rt, peer)) + inet_putpeer(peer); + } +} + +static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) +{ + if (rt6_has_peer(rt)) + return rt6_peer_ptr(rt); + + rt6_bind_peer(rt, create); + return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); +} + +static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +{ + return __rt6_get_peer(rt, 1); +} + static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { struct rt6_info *rt = (struct rt6_info *) dst; @@ -116,7 +149,8 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) unsigned long prev, new; p = peer->metrics; - if (inet_metrics_new(peer)) + if (inet_metrics_new(peer) || + (old & DST_METRICS_FORCE_OVERWRITE)) memcpy(p, old_p, sizeof(u32) * RTAX_MAX); new = (unsigned long) p; @@ -234,9 +268,6 @@ static const struct rt6_info ip6_null_entry_template = { #ifdef CONFIG_IPV6_MULTIPLE_TABLES -static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); - static const struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -259,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, - .output = dst_discard, + .output = dst_discard_sk, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -312,22 +343,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } -void rt6_bind_peer(struct rt6_info *rt, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; - - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) { @@ -359,12 +374,6 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -static bool rt6_need_strict(const struct in6_addr *daddr) -{ - return ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); -} - /* Multipath route selection: * Hash based function using packet header and flowlabel. * Adapted from fib_info_hashfn() @@ -476,6 +485,24 @@ out: } #ifdef CONFIG_IPV6_ROUTER_PREF +struct __rt6_probe_work { + struct work_struct work; + struct in6_addr target; + struct net_device *dev; +}; + +static void rt6_probe_deferred(struct work_struct *w) +{ + struct in6_addr mcaddr; + struct __rt6_probe_work *work = + container_of(w, struct __rt6_probe_work, work); + + addrconf_addr_solict_mult(&work->target, &mcaddr); + ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); + dev_put(work->dev); + kfree(w); +} + static void rt6_probe(struct rt6_info *rt) { struct neighbour *neigh; @@ -499,17 +526,23 @@ static void rt6_probe(struct rt6_info *rt) if (!neigh || time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { - struct in6_addr mcaddr; - struct in6_addr *target; + struct __rt6_probe_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (neigh) { - neigh->updated = jiffies; + if (neigh && work) + __neigh_set_probe_once(neigh); + + if (neigh) write_unlock(&neigh->lock); - } - target = (struct in6_addr *)&rt->rt6i_gateway; - addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); + if (work) { + INIT_WORK(&work->work, rt6_probe_deferred); + work->target = rt->rt6i_gateway; + dev_hold(rt->dst.dev); + work->dev = rt->dst.dev; + schedule_work(&work->work); + } } else { out: write_unlock(&neigh->lock); @@ -554,11 +587,13 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) ret = RT6_NUD_SUCCEED; + else + ret = RT6_NUD_FAIL_PROBE; #endif read_unlock(&neigh->lock); } else { ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? - RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; + RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; } rcu_read_unlock_bh(); @@ -595,16 +630,17 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, goto out; m = rt6_score_route(rt, oif, strict); - if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + if (m == RT6_NUD_FAIL_DO_RR) { match_do_rr = true; m = 0; /* lowest valid score */ - } else if (m < 0) { + } else if (m == RT6_NUD_FAIL_HARD) { goto out; } if (strict & RT6_LOOKUP_F_REACHABLE) rt6_probe(rt); + /* note that m can be RT6_NUD_FAIL_PROBE at this point */ if (m > *mpri) { *do_rr = match_do_rr; *mpri = m; @@ -707,8 +743,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, - dev->ifindex); + if (rinfo->prefix_len == 0) + rt = rt6_get_dflt_router(gwaddr, dev); + else + rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, + gwaddr, dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); @@ -813,14 +852,15 @@ EXPORT_SYMBOL(rt6_lookup); be destroyed. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, + struct nlattr *mx, int mx_len) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info); + err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); write_unlock_bh(&table->tb6_lock); return err; @@ -831,7 +871,7 @@ int ip6_ins_rt(struct rt6_info *rt) struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; - return __ip6_ins_rt(rt, &info); + return __ip6_ins_rt(rt, &info, NULL, 0); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, @@ -847,12 +887,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt = ip6_rt_copy(ort, daddr); if (rt) { - if (!(rt->rt6i_flags & RTF_GATEWAY)) { - if (ort->rt6i_dst.plen != 128 && - ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) - rt->rt6i_flags |= RTF_ANYCAST; - rt->rt6i_gateway = *daddr; - } + if (ort->rt6i_dst.plen != 128 && + ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) + rt->rt6i_flags |= RTF_ANYCAST; rt->rt6i_flags |= RTF_CACHE; @@ -1021,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new->__use = 1; new->input = dst_discard; - new->output = dst_discard; + new->output = dst_discard_sk; if (dst_metrics_read_only(&ort->dst)) new->_metrics = ort->dst._metrics; @@ -1064,10 +1101,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) - return dst; + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; - return NULL; + if (rt6_check_expired(rt)) + return NULL; + + return dst; } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -1136,8 +1176,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; + fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1234,9 +1273,9 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1256,9 +1295,9 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; @@ -1301,7 +1340,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) - return mtu; + goto out; mtu = IPV6_MIN_MTU; @@ -1311,7 +1350,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) mtu = idev->cnf.mtu6; rcu_read_unlock(); - return mtu; +out: + return min_t(unsigned int, mtu, IP6_MAX_MTU); } static struct dst_entry *icmp6_dst_gc_list; @@ -1338,6 +1378,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; atomic_set(&rt->dst.__refcnt, 1); + rt->rt6i_gateway = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; rt->rt6i_idev = idev; @@ -1414,7 +1455,7 @@ static int ip6_dst_gc(struct dst_ops *ops) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; @@ -1471,7 +1512,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); if (!rt) { err = -ENOMEM; @@ -1501,17 +1542,11 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) - rt->dst.flags |= DST_HOST; - - if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { - u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (!metrics) { - err = -ENOMEM; - goto out; - } - dst_init_metrics(&rt->dst, metrics, 0); + if (rt->rt6i_dst.plen == 128) { + rt->dst.flags |= DST_HOST; + dst_metrics_set_force_overwrite(&rt->dst); } + #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->rt6i_src.plen = cfg->fc_src_len; @@ -1540,21 +1575,24 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } } - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; + rt->dst.output = dst_discard_sk; + rt->dst.input = dst_discard; break; case RTN_PROHIBIT: rt->dst.error = -EACCES; + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; break; case RTN_THROW: - rt->dst.error = -EAGAIN; - break; default: - rt->dst.error = -ENETUNREACH; + rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN + : -ENETUNREACH; + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; break; } goto install_route; @@ -1627,31 +1665,13 @@ int ip6_route_add(struct fib6_config *cfg) rt->rt6i_flags = cfg->fc_flags; install_route: - if (cfg->fc_mx) { - struct nlattr *nla; - int remaining; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - - if (type) { - if (type > RTAX_MAX) { - err = -EINVAL; - goto out; - } - - dst_metric_set(&rt->dst, type, nla_get_u32(nla)); - } - } - } - rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; cfg->fc_nlinfo.nl_net = dev_net(dev); - return __ip6_ins_rt(rt, &cfg->fc_nlinfo); + return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); out: if (dev) @@ -1873,11 +1893,12 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; + if (ort->rt6i_flags & RTF_GATEWAY) + rt->rt6i_gateway = ort->rt6i_gateway; + else + rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; - if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == - (RTF_DEFAULT | RTF_ADDRCONF)) - rt6_set_from(rt, ort); + rt6_set_from(rt, ort); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -2110,27 +2131,23 @@ static int ip6_pkt_discard(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_discard_out(struct sk_buff *skb) +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - static int ip6_pkt_prohibit(struct sk_buff *skb) { return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_prohibit_out(struct sk_buff *skb) +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } -#endif - /* * Allocate a dst for local (unicast / anycast) address. */ @@ -2140,12 +2157,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - - if (!rt) { - net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, + DST_NOCOUNT, NULL); + if (!rt) return ERR_PTR(-ENOMEM); - } in6_dev_hold(idev); @@ -2160,6 +2175,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, else rt->rt6i_flags |= RTF_LOCAL; + rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); @@ -2215,7 +2231,28 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) .net = net, .addr = &ifp->addr, }; - fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); + fib6_clean_all(net, fib6_remove_prefsrc, &adni); +} + +#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) +#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) + +/* Remove routers and update dst entries when gateway turn into host. */ +static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +{ + struct in6_addr *gateway = (struct in6_addr *)arg; + + if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || + ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && + ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + return -1; + } + return 0; +} + +void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) +{ + fib6_clean_all(net, fib6_clean_tohost, gateway); } struct arg_dev_net { @@ -2242,7 +2279,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) .net = net, }; - fib6_clean_all(net, fib6_ifdown, 0, &adn); + fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); } @@ -2297,7 +2334,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu) .mtu = mtu, }; - fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { @@ -2693,6 +2730,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_MARK]) + fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (iif) { struct net_device *dev; int flags = 0; @@ -2800,56 +2840,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, #ifdef CONFIG_PROC_FS -struct rt6_proc_arg -{ - char *buffer; - int offset; - int length; - int skip; - int len; -}; - -static int rt6_info_route(struct rt6_info *rt, void *p_arg) -{ - struct seq_file *m = p_arg; - - seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); - -#ifdef CONFIG_IPV6_SUBTREES - seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); -#else - seq_puts(m, "00000000000000000000000000000000 00 "); -#endif - if (rt->rt6i_flags & RTF_GATEWAY) { - seq_printf(m, "%pi6", &rt->rt6i_gateway); - } else { - seq_puts(m, "00000000000000000000000000000000"); - } - seq_printf(m, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), - rt->dst.__use, rt->rt6i_flags, - rt->dst.dev ? rt->dst.dev->name : ""); - return 0; -} - -static int ipv6_route_show(struct seq_file *m, void *v) -{ - struct net *net = (struct net *)m->private; - fib6_clean_all_ro(net, rt6_info_route, 0, m); - return 0; -} - -static int ipv6_route_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, ipv6_route_show); -} - static const struct file_operations ipv6_route_proc_fops = { .owner = THIS_MODULE, .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release_net, + .release = seq_release_net, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) |
