diff options
Diffstat (limited to 'net/ipv6/ip6_fib.c')
| -rw-r--r-- | net/ipv6/ip6_fib.c | 725 |
1 files changed, 556 insertions, 169 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index de382114609..cb4459bd1d2 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -9,15 +9,16 @@ * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. + * + * Changes: + * Yuji SEKIYA @USAGI: Support default route on router node; + * remove ip6_null_entry from the top of + * routing table. + * Ville Nuorvala: Fixed routing subtrees. */ -/* - * Changes: - * Yuji SEKIYA @USAGI: Support default route on router node; - * remove ip6_null_entry from the top of - * routing table. - * Ville Nuorvala: Fixed routing subtrees. - */ +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/errno.h> #include <linux/types.h> #include <linux/net.h> @@ -28,10 +29,6 @@ #include <linux/list.h> #include <linux/slab.h> -#ifdef CONFIG_PROC_FS -#include <linux/proc_fs.h> -#endif - #include <net/ipv6.h> #include <net/ndisc.h> #include <net/addrconf.h> @@ -42,15 +39,14 @@ #define RT6_DEBUG 2 #if RT6_DEBUG >= 3 -#define RT6_TRACE(x...) printk(KERN_DEBUG x) +#define RT6_TRACE(x...) pr_debug(x) #else #define RT6_TRACE(x...) do { ; } while (0) #endif -static struct kmem_cache * fib6_node_kmem __read_mostly; +static struct kmem_cache *fib6_node_kmem __read_mostly; -enum fib_walk_state_t -{ +enum fib_walk_state_t { #ifdef CONFIG_IPV6_SUBTREES FWS_S, #endif @@ -60,8 +56,7 @@ enum fib_walk_state_t FWS_U }; -struct fib6_cleaner_t -{ +struct fib6_cleaner_t { struct fib6_walker_t w; struct net *net; int (*func)(struct rt6_info *, void *arg); @@ -76,8 +71,7 @@ static DEFINE_RWLOCK(fib6_walker_lock); #define FWS_INIT FWS_L #endif -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, - struct rt6_info *rt); +static void fib6_prune_clones(struct net *net, struct fib6_node *fn); static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); static int fib6_walk(struct fib6_walker_t *w); @@ -134,12 +128,12 @@ static __inline__ u32 fib6_new_sernum(void) # define BITOP_BE32_SWIZZLE 0 #endif -static __inline__ __be32 addr_bit_set(void *token, int fn_bit) +static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) { - __be32 *addr = token; + const __be32 *addr = token; /* * Here, - * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) + * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) * is optimized version of * htonl(1 << ((~fn_bit)&0x1F)) * See include/asm-generic/bitops/le.h. @@ -148,7 +142,7 @@ static __inline__ __be32 addr_bit_set(void *token, int fn_bit) addr[fn_bit >> 5]; } -static __inline__ struct fib6_node * node_alloc(void) +static __inline__ struct fib6_node *node_alloc(void) { struct fib6_node *fn; @@ -157,7 +151,7 @@ static __inline__ struct fib6_node * node_alloc(void) return fn; } -static __inline__ void node_free(struct fib6_node * fn) +static __inline__ void node_free(struct fib6_node *fn) { kmem_cache_free(fib6_node_kmem, fn); } @@ -194,10 +188,11 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) struct fib6_table *table; table = kzalloc(sizeof(*table), GFP_ATOMIC); - if (table != NULL) { + if (table) { table->tb6_id = id; table->tb6_root.leaf = net->ipv6.ip6_null_entry; table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&table->tb6_peers); } return table; @@ -214,7 +209,7 @@ struct fib6_table *fib6_new_table(struct net *net, u32 id) return tb; tb = fib6_alloc_table(net, id); - if (tb != NULL) + if (tb) fib6_link_table(net, tb); return tb; @@ -224,7 +219,6 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) { struct fib6_table *tb; struct hlist_head *head; - struct hlist_node *node; unsigned int h; if (id == 0) @@ -232,7 +226,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) h = id & (FIB6_TABLE_HASHSZ - 1); rcu_read_lock(); head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (tb->tb6_id == id) { rcu_read_unlock(); return tb; @@ -260,10 +254,10 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) return net->ipv6.fib6_main_tbl; } -struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { - return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags); + return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); } static void __net_init fib6_tables_init(struct net *net) @@ -293,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) static void fib6_dump_end(struct netlink_callback *cb) { - struct fib6_walker_t *w = (void*)cb->args[2]; + struct fib6_walker_t *w = (void *)cb->args[2]; if (w) { if (cb->args[4]) { @@ -303,7 +297,7 @@ static void fib6_dump_end(struct netlink_callback *cb) cb->args[2] = 0; kfree(w); } - cb->done = (void*)cb->args[3]; + cb->done = (void *)cb->args[3]; cb->args[1] = 3; } @@ -363,7 +357,6 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct rt6_rtnl_dump_arg arg; struct fib6_walker_t *w; struct fib6_table *tb; - struct hlist_node *node; struct hlist_head *head; int res = 0; @@ -371,7 +364,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) s_e = cb->args[1]; w = (void *)cb->args[2]; - if (w == NULL) { + if (!w) { /* New dump: * * 1. hook callback destructor. @@ -383,7 +376,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) * 2. allocate and initialize walker. */ w = kzalloc(sizeof(*w), GFP_ATOMIC); - if (w == NULL) + if (!w) return -ENOMEM; w->func = fib6_dump_node; cb->args[2] = (long)w; @@ -394,10 +387,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) arg.net = net; w->args = &arg; + rcu_read_lock(); for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry(tb, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (e < s_e) goto next; res = fib6_dump_table(tb, skb, cb); @@ -408,6 +402,7 @@ next: } } out: + rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; @@ -425,9 +420,10 @@ out: * node. */ -static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, - int addrlen, int plen, - int offset) +static struct fib6_node *fib6_add_1(struct fib6_node *root, + struct in6_addr *addr, int plen, + int offset, int allow_create, + int replace_required) { struct fib6_node *fn, *in, *ln; struct fib6_node *pn = NULL; @@ -449,8 +445,16 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, * Prefix match */ if (plen < fn->fn_bit || - !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) + !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { + if (!allow_create) { + if (replace_required) { + pr_warn("Can't replace route, no match found\n"); + return ERR_PTR(-ENOENT); + } + pr_warn("NLM_F_CREATE should be set when creating new route\n"); + } goto insert_above; + } /* * Exact match ? @@ -458,7 +462,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, if (plen == fn->fn_bit) { /* clean up an intermediate node */ - if ((fn->fn_flags & RTN_RTINFO) == 0) { + if (!(fn->fn_flags & RTN_RTINFO)) { rt6_release(fn->leaf); fn->leaf = NULL; } @@ -476,9 +480,25 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, fn->fn_sernum = sernum; dir = addr_bit_set(addr, fn->fn_bit); pn = fn; - fn = dir ? fn->right: fn->left; + fn = dir ? fn->right : fn->left; } while (fn); + if (!allow_create) { + /* We should not create new node because + * NLM_F_REPLACE was specified without NLM_F_CREATE + * I assume it is safe to require NLM_F_CREATE when + * REPLACE flag is used! Later we may want to remove the + * check for replace_required, because according + * to netlink specification, NLM_F_CREATE + * MUST be specified if new route is created. + * That would keep IPv6 consistent with IPv4 + */ + if (replace_required) { + pr_warn("Can't replace route, no match found\n"); + return ERR_PTR(-ENOENT); + } + pr_warn("NLM_F_CREATE should be set when creating new route\n"); + } /* * We walked to the bottom of tree. * Create new leaf node without children. @@ -486,8 +506,8 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, ln = node_alloc(); - if (ln == NULL) - return NULL; + if (!ln) + return ERR_PTR(-ENOMEM); ln->fn_bit = plen; ln->parent = pn; @@ -518,7 +538,7 @@ insert_above: but if it is >= plen, the value is ignored in any case. */ - bit = __ipv6_addr_diff(addr, &key->addr, addrlen); + bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr)); /* * (intermediate)[in] @@ -529,12 +549,12 @@ insert_above: in = node_alloc(); ln = node_alloc(); - if (in == NULL || ln == NULL) { + if (!in || !ln) { if (in) node_free(in); if (ln) node_free(ln); - return NULL; + return ERR_PTR(-ENOMEM); } /* @@ -583,8 +603,8 @@ insert_above: ln = node_alloc(); - if (ln == NULL) - return NULL; + if (!ln) + return ERR_PTR(-ENOMEM); ln->fn_bit = plen; @@ -607,19 +627,61 @@ insert_above: return ln; } +static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; +} + +static int fib6_commit_metrics(struct dst_entry *dst, + struct nlattr *mx, int mx_len) +{ + struct nlattr *nla; + int remaining; + u32 *mp; + + if (dst->flags & DST_HOST) { + mp = dst_metrics_write_ptr(dst); + } else { + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!mp) + return -ENOMEM; + dst_init_metrics(dst, mp, 0); + } + + nla_for_each_attr(nla, mx, mx_len, remaining) { + int type = nla_type(nla); + + if (type) { + if (type > RTAX_MAX) + return -EINVAL; + + mp[type - 1] = nla_get_u32(nla); + } + } + return 0; +} + /* * Insert routing information in a node. */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info) + struct nl_info *info, struct nlattr *mx, int mx_len) { struct rt6_info *iter = NULL; struct rt6_info **ins; + int replace = (info->nlh && + (info->nlh->nlmsg_flags & NLM_F_REPLACE)); + int add = (!info->nlh || + (info->nlh->nlmsg_flags & NLM_F_CREATE)); + int found = 0; + bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); + int err; ins = &fn->leaf; - for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) { + for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { /* * Search for duplicates */ @@ -628,20 +690,42 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* * Same priority level */ + if (info->nlh && + (info->nlh->nlmsg_flags & NLM_F_EXCL)) + return -EEXIST; + if (replace) { + found++; + break; + } - if (iter->rt6i_dev == rt->rt6i_dev && + if (iter->dst.dev == rt->dst.dev && iter->rt6i_idev == rt->rt6i_idev && ipv6_addr_equal(&iter->rt6i_gateway, &rt->rt6i_gateway)) { - if (!(iter->rt6i_flags&RTF_EXPIRES)) + if (rt->rt6i_nsiblings) + rt->rt6i_nsiblings = 0; + if (!(iter->rt6i_flags & RTF_EXPIRES)) return -EEXIST; - iter->rt6i_expires = rt->rt6i_expires; - if (!(rt->rt6i_flags&RTF_EXPIRES)) { - iter->rt6i_flags &= ~RTF_EXPIRES; - iter->rt6i_expires = 0; - } + if (!(rt->rt6i_flags & RTF_EXPIRES)) + rt6_clean_expires(iter); + else + rt6_set_expires(iter, rt->dst.expires); return -EEXIST; } + /* If we have the same destination and the same metric, + * but not the same gateway, then the route we try to + * add is sibling to this route, increment our counter + * of siblings, and later we will add our route to the + * list. + * Only static routes (which don't have flag + * RTF_EXPIRES) are used for ECMPv6. + * + * To avoid long list, we only had siblings if the + * route have a gateway. + */ + if (rt_can_ecmp && + rt6_qualify_for_ecmp(iter)) + rt->rt6i_nsiblings++; } if (iter->rt6i_metric > rt->rt6i_metric) @@ -654,20 +738,83 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (ins == &fn->leaf) fn->rr_ptr = NULL; + /* Link this route to others same route. */ + if (rt->rt6i_nsiblings) { + unsigned int rt6i_nsiblings; + struct rt6_info *sibling, *temp_sibling; + + /* Find the first route that have the same metric */ + sibling = fn->leaf; + while (sibling) { + if (sibling->rt6i_metric == rt->rt6i_metric && + rt6_qualify_for_ecmp(sibling)) { + list_add_tail(&rt->rt6i_siblings, + &sibling->rt6i_siblings); + break; + } + sibling = sibling->dst.rt6_next; + } + /* For each sibling in the list, increment the counter of + * siblings. BUG() if counters does not match, list of siblings + * is broken! + */ + rt6i_nsiblings = 0; + list_for_each_entry_safe(sibling, temp_sibling, + &rt->rt6i_siblings, rt6i_siblings) { + sibling->rt6i_nsiblings++; + BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); + rt6i_nsiblings++; + } + BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); + } + /* * insert node */ + if (!replace) { + if (!add) + pr_warn("NLM_F_CREATE should be set when creating new route\n"); + +add: + if (mx) { + err = fib6_commit_metrics(&rt->dst, mx, mx_len); + if (err) + return err; + } + rt->dst.rt6_next = iter; + *ins = rt; + rt->rt6i_node = fn; + atomic_inc(&rt->rt6i_ref); + inet6_rt_notify(RTM_NEWROUTE, rt, info); + info->nl_net->ipv6.rt6_stats->fib_rt_entries++; + + if (!(fn->fn_flags & RTN_RTINFO)) { + info->nl_net->ipv6.rt6_stats->fib_route_nodes++; + fn->fn_flags |= RTN_RTINFO; + } - rt->dst.rt6_next = iter; - *ins = rt; - rt->rt6i_node = fn; - atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info); - info->nl_net->ipv6.rt6_stats->fib_rt_entries++; - - if ((fn->fn_flags & RTN_RTINFO) == 0) { - info->nl_net->ipv6.rt6_stats->fib_route_nodes++; - fn->fn_flags |= RTN_RTINFO; + } else { + if (!found) { + if (add) + goto add; + pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); + return -ENOENT; + } + if (mx) { + err = fib6_commit_metrics(&rt->dst, mx, mx_len); + if (err) + return err; + } + *ins = rt; + rt->rt6i_node = fn; + rt->dst.rt6_next = iter->dst.rt6_next; + atomic_inc(&rt->rt6i_ref); + inet6_rt_notify(RTM_NEWROUTE, rt, info); + rt6_release(iter); + if (!(fn->fn_flags & RTN_RTINFO)) { + info->nl_net->ipv6.rt6_stats->fib_route_nodes++; + fn->fn_flags |= RTN_RTINFO; + } } return 0; @@ -676,7 +823,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && - (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) + (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } @@ -694,16 +841,31 @@ void fib6_force_start_gc(struct net *net) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, + struct nlattr *mx, int mx_len) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; - - fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), - rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst)); - - if (fn == NULL) + int allow_create = 1; + int replace_required = 0; + + if (info->nlh) { + if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) + allow_create = 0; + if (info->nlh->nlmsg_flags & NLM_F_REPLACE) + replace_required = 1; + } + if (!allow_create && !replace_required) + pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); + + fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, + offsetof(struct rt6_info, rt6i_dst), allow_create, + replace_required); + if (IS_ERR(fn)) { + err = PTR_ERR(fn); + fn = NULL; goto out; + } pn = fn; @@ -711,7 +873,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (rt->rt6i_src.plen) { struct fib6_node *sn; - if (fn->subtree == NULL) { + if (!fn->subtree) { struct fib6_node *sfn; /* @@ -726,7 +888,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) /* Create subtree root node */ sfn = node_alloc(); - if (sfn == NULL) + if (!sfn) goto st_failure; sfn->leaf = info->nl_net->ipv6.ip6_null_entry; @@ -737,15 +899,17 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) /* Now add the first leaf node to new subtree */ sn = fib6_add_1(sfn, &rt->rt6i_src.addr, - sizeof(struct in6_addr), rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src)); + rt->rt6i_src.plen, + offsetof(struct rt6_info, rt6i_src), + allow_create, replace_required); - if (sn == NULL) { + if (IS_ERR(sn)) { /* If it is failed, discard just allocated root, and then (in st_failure) stale node in main tree. */ node_free(sfn); + err = PTR_ERR(sn); goto st_failure; } @@ -754,14 +918,17 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) fn->subtree = sfn; } else { sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, - sizeof(struct in6_addr), rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src)); + rt->rt6i_src.plen, + offsetof(struct rt6_info, rt6i_src), + allow_create, replace_required); - if (sn == NULL) + if (IS_ERR(sn)) { + err = PTR_ERR(sn); goto st_failure; + } } - if (fn->leaf == NULL) { + if (!fn->leaf) { fn->leaf = rt; atomic_inc(&rt->rt6i_ref); } @@ -769,12 +936,11 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) } #endif - err = fib6_add_rt2node(fn, rt, info); - - if (err == 0) { + err = fib6_add_rt2node(fn, rt, info, mx, mx_len); + if (!err) { fib6_start_gc(info->nl_net, rt); - if (!(rt->rt6i_flags&RTF_CACHE)) - fib6_prune_clones(info->nl_net, pn, rt); + if (!(rt->rt6i_flags & RTF_CACHE)) + fib6_prune_clones(info->nl_net, pn); } out: @@ -821,12 +987,12 @@ st_failure: */ struct lookup_args { - int offset; /* key offset on rt6_info */ - struct in6_addr *addr; /* search key */ + int offset; /* key offset on rt6_info */ + const struct in6_addr *addr; /* search key */ }; -static struct fib6_node * fib6_lookup_1(struct fib6_node *root, - struct lookup_args *args) +static struct fib6_node *fib6_lookup_1(struct fib6_node *root, + struct lookup_args *args) { struct fib6_node *fn; __be32 dir; @@ -851,11 +1017,10 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, fn = next; continue; } - break; } - while(fn) { + while (fn) { if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { struct rt6key *key; @@ -864,14 +1029,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { #ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) - fn = fib6_lookup_1(fn->subtree, args + 1); + if (fn->subtree) { + struct fib6_node *sfn; + sfn = fib6_lookup_1(fn->subtree, + args + 1); + if (!sfn) + goto backtrack; + fn = sfn; + } #endif - if (!fn || fn->fn_flags & RTN_RTINFO) + if (fn->fn_flags & RTN_RTINFO) return fn; } } - +#ifdef CONFIG_IPV6_SUBTREES +backtrack: +#endif if (fn->fn_flags & RTN_ROOT) break; @@ -881,8 +1054,8 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, return NULL; } -struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, - struct in6_addr *saddr) +struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct fib6_node *fn; struct lookup_args args[] = { @@ -902,8 +1075,7 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, }; fn = fib6_lookup_1(root, daddr ? args : args + 1); - - if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) + if (!fn || fn->fn_flags & RTN_TL_ROOT) fn = root; return fn; @@ -915,9 +1087,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, */ -static struct fib6_node * fib6_locate_1(struct fib6_node *root, - struct in6_addr *addr, - int plen, int offset) +static struct fib6_node *fib6_locate_1(struct fib6_node *root, + const struct in6_addr *addr, + int plen, int offset) { struct fib6_node *fn; @@ -945,9 +1117,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root, return NULL; } -struct fib6_node * fib6_locate(struct fib6_node *root, - struct in6_addr *daddr, int dst_len, - struct in6_addr *saddr, int src_len) +struct fib6_node *fib6_locate(struct fib6_node *root, + const struct in6_addr *daddr, int dst_len, + const struct in6_addr *saddr, int src_len) { struct fib6_node *fn; @@ -963,7 +1135,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root, } #endif - if (fn && fn->fn_flags&RTN_RTINFO) + if (fn && fn->fn_flags & RTN_RTINFO) return fn; return NULL; @@ -977,14 +1149,13 @@ struct fib6_node * fib6_locate(struct fib6_node *root, static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) { - if (fn->fn_flags&RTN_ROOT) + if (fn->fn_flags & RTN_ROOT) return net->ipv6.ip6_null_entry; - while(fn) { - if(fn->left) + while (fn) { + if (fn->left) return fn->left->leaf; - - if(fn->right) + if (fn->right) return fn->right->leaf; fn = FIB6_SUBTREE(fn); @@ -1016,18 +1187,20 @@ static struct fib6_node *fib6_repair_tree(struct net *net, children = 0; child = NULL; - if (fn->right) child = fn->right, children |= 1; - if (fn->left) child = fn->left, children |= 2; + if (fn->right) + child = fn->right, children |= 1; + if (fn->left) + child = fn->left, children |= 2; if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES /* Subtree root (i.e. fn) may have one child */ - || (children && fn->fn_flags&RTN_ROOT) + || (children && fn->fn_flags & RTN_ROOT) #endif ) { fn->leaf = fib6_find_prefix(net, fn); #if RT6_DEBUG >= 2 - if (fn->leaf==NULL) { + if (!fn->leaf) { WARN_ON(!fn->leaf); fn->leaf = net->ipv6.ip6_null_entry; } @@ -1045,8 +1218,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, } else { WARN_ON(fn->fn_flags & RTN_ROOT); #endif - if (pn->right == fn) pn->right = child; - else if (pn->left == fn) pn->left = child; + if (pn->right == fn) + pn->right = child; + else if (pn->left == fn) + pn->left = child; #if RT6_DEBUG >= 2 else WARN_ON(1); @@ -1060,7 +1235,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, read_lock(&fib6_walker_lock); FOR_WALKERS(w) { - if (child == NULL) { + if (!child) { if (w->root == fn) { w->root = w->node = NULL; RT6_TRACE("W %p adjusted by delroot 1\n", w); @@ -1078,10 +1253,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, w->node = child; if (children&2) { RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); - w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; + w->state = w->state >= FWS_R ? FWS_U : FWS_INIT; } else { RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); - w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; + w->state = w->state >= FWS_C ? FWS_U : FWS_INIT; } } } @@ -1089,7 +1264,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, read_unlock(&fib6_walker_lock); node_free(fn); - if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn)) + if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) return pn; rt6_release(pn->leaf); @@ -1117,13 +1292,24 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, if (fn->rr_ptr == rt) fn->rr_ptr = NULL; + /* Remove this entry from other siblings */ + if (rt->rt6i_nsiblings) { + struct rt6_info *sibling, *next_sibling; + + list_for_each_entry_safe(sibling, next_sibling, + &rt->rt6i_siblings, rt6i_siblings) + sibling->rt6i_nsiblings--; + rt->rt6i_nsiblings = 0; + list_del_init(&rt->rt6i_siblings); + } + /* Adjust walkers */ read_lock(&fib6_walker_lock); FOR_WALKERS(w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); w->leaf = rt->dst.rt6_next; - if (w->leaf == NULL) + if (!w->leaf) w->state = FWS_U; } } @@ -1132,7 +1318,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, rt->dst.rt6_next = NULL; /* If it was last route, expunge its radix tree node */ - if (fn->leaf == NULL) { + if (!fn->leaf) { fn->fn_flags &= ~RTN_RTINFO; net->ipv6.rt6_stats->fib_route_nodes--; fn = fib6_repair_tree(net, fn); @@ -1146,7 +1332,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, * to still alive ones. */ while (fn) { - if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) { + if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { fn->leaf = fib6_find_prefix(net, fn); atomic_inc(&fn->leaf->rt6i_ref); rt6_release(rt); @@ -1168,27 +1354,27 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) struct rt6_info **rtp; #if RT6_DEBUG >= 2 - if (rt->dst.obsolete>0) { + if (rt->dst.obsolete > 0) { WARN_ON(fn != NULL); return -ENOENT; } #endif - if (fn == NULL || rt == net->ipv6.ip6_null_entry) + if (!fn || rt == net->ipv6.ip6_null_entry) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - if (!(rt->rt6i_flags&RTF_CACHE)) { + if (!(rt->rt6i_flags & RTF_CACHE)) { struct fib6_node *pn = fn; #ifdef CONFIG_IPV6_SUBTREES /* clones of this route might be in another subtree */ if (rt->rt6i_src.plen) { - while (!(pn->fn_flags&RTN_ROOT)) + while (!(pn->fn_flags & RTN_ROOT)) pn = pn->parent; pn = pn->parent; } #endif - fib6_prune_clones(info->nl_net, pn, rt); + fib6_prune_clones(info->nl_net, pn); } /* @@ -1234,11 +1420,11 @@ static int fib6_walk_continue(struct fib6_walker_t *w) for (;;) { fn = w->node; - if (fn == NULL) + if (!fn) return 0; if (w->prune && fn != w->root && - fn->fn_flags&RTN_RTINFO && w->state < FWS_C) { + fn->fn_flags & RTN_RTINFO && w->state < FWS_C) { w->state = FWS_C; w->leaf = fn->leaf; } @@ -1267,12 +1453,12 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->state = FWS_C; w->leaf = fn->leaf; case FWS_C: - if (w->leaf && fn->fn_flags&RTN_RTINFO) { + if (w->leaf && fn->fn_flags & RTN_RTINFO) { int err; - if (w->count < w->skip) { - w->count++; - continue; + if (w->skip) { + w->skip--; + goto skip; } err = w->func(w); @@ -1282,6 +1468,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->count++; continue; } +skip: w->state = FWS_U; case FWS_U: if (fn == w->root) @@ -1341,7 +1528,8 @@ static int fib6_clean_node(struct fib6_walker_t *w) res = fib6_del(rt, &info); if (res) { #if RT6_DEBUG >= 2 - printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); + pr_debug("%s: del failed: rt=%p@%p err=%d\n", + __func__, rt, rt->rt6i_node, res); #endif continue; } @@ -1383,20 +1571,19 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, } void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) + void *arg) { struct fib6_table *table; - struct hlist_node *node; struct hlist_head *head; unsigned int h; rcu_read_lock(); for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, prune, arg); + func, 0, arg); write_unlock_bh(&table->tb6_lock); } } @@ -1413,10 +1600,9 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg) return 0; } -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, - struct rt6_info *rt) +static void fib6_prune_clones(struct net *net, struct fib6_node *fn) { - fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); + fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL); } /* @@ -1441,8 +1627,8 @@ static int fib6_age(struct rt6_info *rt, void *arg) * only if they are not in use now. */ - if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) { - if (time_after(now, rt->rt6i_expires)) { + if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { + if (time_after(now, rt->dst.expires)) { RT6_TRACE("expiring %p\n", rt); return -1; } @@ -1452,11 +1638,20 @@ static int fib6_age(struct rt6_info *rt, void *arg) time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { RT6_TRACE("aging clone %p\n", rt); return -1; - } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) { - RT6_TRACE("purging route %p via non-router but gateway\n", - rt); - return -1; + } else if (rt->rt6i_flags & RTF_GATEWAY) { + struct neighbour *neigh; + __u8 neigh_flags = 0; + + neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); + if (neigh) { + neigh_flags = neigh->flags; + neigh_release(neigh); + } + if (!(neigh_flags & NTF_ROUTER)) { + RT6_TRACE("purging route %p via non-router but gateway\n", + rt); + return -1; + } } gc_args.more++; } @@ -1466,27 +1661,28 @@ static int fib6_age(struct rt6_info *rt, void *arg) static DEFINE_SPINLOCK(fib6_gc_lock); -void fib6_run_gc(unsigned long expires, struct net *net) +void fib6_run_gc(unsigned long expires, struct net *net, bool force) { - if (expires != ~0UL) { + unsigned long now; + + if (force) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = expires ? (int)expires : - net->ipv6.sysctl.ip6_rt_gc_interval; - } else { - if (!spin_trylock_bh(&fib6_gc_lock)) { - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); - return; - } - gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; + } else if (!spin_trylock_bh(&fib6_gc_lock)) { + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); + return; } + gc_args.timeout = expires ? (int)expires : + net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = icmp6_dst_gc(); - fib6_clean_all(net, fib6_age, 0, NULL); + fib6_clean_all(net, fib6_age, NULL); + now = jiffies; + net->ipv6.ip6_rt_last_gc = now; if (gc_args.more) mod_timer(&net->ipv6.ip6_fib_timer, - round_jiffies(jiffies + round_jiffies(now + net->ipv6.sysctl.ip6_rt_gc_interval)); else del_timer(&net->ipv6.ip6_fib_timer); @@ -1495,7 +1691,7 @@ void fib6_run_gc(unsigned long expires, struct net *net) static void fib6_gc_timer_cb(unsigned long arg) { - fib6_run_gc(0, (struct net *)arg); + fib6_run_gc(0, (struct net *)arg, true); } static int __net_init fib6_net_init(struct net *net) @@ -1524,6 +1720,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), @@ -1534,6 +1731,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); #endif fib6_tables_init(net); @@ -1549,7 +1747,7 @@ out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: return -ENOMEM; - } +} static void fib6_net_exit(struct net *net) { @@ -1557,8 +1755,10 @@ static void fib6_net_exit(struct net *net) del_timer_sync(&net->ipv6.ip6_fib_timer); #ifdef CONFIG_IPV6_MULTIPLE_TABLES + inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); kfree(net->ipv6.fib6_local_tbl); #endif + inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); kfree(net->ipv6.fib6_main_tbl); kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); @@ -1584,7 +1784,8 @@ int __init fib6_init(void) if (ret) goto out_kmem_cache_create; - ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); + ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, + NULL); if (ret) goto out_unregister_subsys; out: @@ -1602,3 +1803,189 @@ void fib6_gc_cleanup(void) unregister_pernet_subsys(&fib6_net_ops); kmem_cache_destroy(fib6_node_kmem); } + +#ifdef CONFIG_PROC_FS + +struct ipv6_route_iter { + struct seq_net_private p; + struct fib6_walker_t w; + loff_t skip; + struct fib6_table *tbl; + __u32 sernum; +}; + +static int ipv6_route_seq_show(struct seq_file *seq, void *v) +{ + struct rt6_info *rt = v; + struct ipv6_route_iter *iter = seq->private; + + seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); + +#ifdef CONFIG_IPV6_SUBTREES + seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); +#else + seq_puts(seq, "00000000000000000000000000000000 00 "); +#endif + if (rt->rt6i_flags & RTF_GATEWAY) + seq_printf(seq, "%pi6", &rt->rt6i_gateway); + else + seq_puts(seq, "00000000000000000000000000000000"); + + seq_printf(seq, " %08x %08x %08x %08x %8s\n", + rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), + rt->dst.__use, rt->rt6i_flags, + rt->dst.dev ? rt->dst.dev->name : ""); + iter->w.leaf = NULL; + return 0; +} + +static int ipv6_route_yield(struct fib6_walker_t *w) +{ + struct ipv6_route_iter *iter = w->args; + + if (!iter->skip) + return 1; + + do { + iter->w.leaf = iter->w.leaf->dst.rt6_next; + iter->skip--; + if (!iter->skip && iter->w.leaf) + return 1; + } while (iter->w.leaf); + + return 0; +} + +static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter) +{ + memset(&iter->w, 0, sizeof(iter->w)); + iter->w.func = ipv6_route_yield; + iter->w.root = &iter->tbl->tb6_root; + iter->w.state = FWS_INIT; + iter->w.node = iter->w.root; + iter->w.args = iter; + iter->sernum = iter->w.root->fn_sernum; + INIT_LIST_HEAD(&iter->w.lh); + fib6_walker_link(&iter->w); +} + +static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, + struct net *net) +{ + unsigned int h; + struct hlist_node *node; + + if (tbl) { + h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1; + node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist)); + } else { + h = 0; + node = NULL; + } + + while (!node && h < FIB6_TABLE_HASHSZ) { + node = rcu_dereference_bh( + hlist_first_rcu(&net->ipv6.fib_table_hash[h++])); + } + return hlist_entry_safe(node, struct fib6_table, tb6_hlist); +} + +static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) +{ + if (iter->sernum != iter->w.root->fn_sernum) { + iter->sernum = iter->w.root->fn_sernum; + iter->w.state = FWS_INIT; + iter->w.node = iter->w.root; + WARN_ON(iter->w.skip); + iter->w.skip = iter->w.count; + } +} + +static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + int r; + struct rt6_info *n; + struct net *net = seq_file_net(seq); + struct ipv6_route_iter *iter = seq->private; + + if (!v) + goto iter_table; + + n = ((struct rt6_info *)v)->dst.rt6_next; + if (n) { + ++*pos; + return n; + } + +iter_table: + ipv6_route_check_sernum(iter); + read_lock(&iter->tbl->tb6_lock); + r = fib6_walk_continue(&iter->w); + read_unlock(&iter->tbl->tb6_lock); + if (r > 0) { + if (v) + ++*pos; + return iter->w.leaf; + } else if (r < 0) { + fib6_walker_unlink(&iter->w); + return NULL; + } + fib6_walker_unlink(&iter->w); + + iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); + if (!iter->tbl) + return NULL; + + ipv6_route_seq_setup_walk(iter); + goto iter_table; +} + +static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU_BH) +{ + struct net *net = seq_file_net(seq); + struct ipv6_route_iter *iter = seq->private; + + rcu_read_lock_bh(); + iter->tbl = ipv6_route_seq_next_table(NULL, net); + iter->skip = *pos; + + if (iter->tbl) { + ipv6_route_seq_setup_walk(iter); + return ipv6_route_seq_next(seq, NULL, pos); + } else { + return NULL; + } +} + +static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) +{ + struct fib6_walker_t *w = &iter->w; + return w->node && !(w->state == FWS_U && w->node == w->root); +} + +static void ipv6_route_seq_stop(struct seq_file *seq, void *v) + __releases(RCU_BH) +{ + struct ipv6_route_iter *iter = seq->private; + + if (ipv6_route_iter_active(iter)) + fib6_walker_unlink(&iter->w); + + rcu_read_unlock_bh(); +} + +static const struct seq_operations ipv6_route_seq_ops = { + .start = ipv6_route_seq_start, + .next = ipv6_route_seq_next, + .stop = ipv6_route_seq_stop, + .show = ipv6_route_seq_show +}; + +int ipv6_route_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ipv6_route_seq_ops, + sizeof(struct ipv6_route_iter)); +} + +#endif /* CONFIG_PROC_FS */ |
