diff options
author | Patrick McHardy <kaber@trash.net> | 2010-04-20 16:02:01 +0200 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2010-04-20 16:02:01 +0200 |
commit | 62910554656cdcd6b6f84a5154c4155aae4ca231 (patch) | |
tree | dcf14004f6fd2ef7154362ff948bfeba0f3ea92d /net/ipv4/ipmr.c | |
parent | 22265a5c3c103cf8c50be62e6c90d045eb649e6d (diff) | |
parent | ab9304717f7624c41927f442e6b6d418b2d8b3e4 (diff) |
Merge branch 'master' of /repos/git/net-next-2.6
Conflicts:
Documentation/feature-removal-schedule.txt
net/ipv6/netfilter/ip6t_REJECT.c
net/netfilter/xt_limit.c
Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/ipv4/ipmr.c')
-rw-r--r-- | net/ipv4/ipmr.c | 829 |
1 files changed, 566 insertions, 263 deletions
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1d42f6103c8..1aa498d7a0a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -47,6 +47,7 @@ #include <linux/mroute.h> #include <linux/init.h> #include <linux/if_ether.h> +#include <linux/slab.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> @@ -62,11 +63,40 @@ #include <net/ipip.h> #include <net/checksum.h> #include <net/netlink.h> +#include <net/fib_rules.h> #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) #define CONFIG_IP_PIMSM 1 #endif +struct mr_table { + struct list_head list; +#ifdef CONFIG_NET_NS + struct net *net; +#endif + u32 id; + struct sock *mroute_sk; + struct timer_list ipmr_expire_timer; + struct list_head mfc_unres_queue; + struct list_head mfc_cache_array[MFC_LINES]; + struct vif_device vif_table[MAXVIFS]; + int maxvif; + atomic_t cache_resolve_queue_len; + int mroute_do_assert; + int mroute_do_pim; +#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) + int mroute_reg_vif_num; +#endif +}; + +struct ipmr_rule { + struct fib_rule common; +}; + +struct ipmr_result { + struct mr_table *mrt; +}; + /* Big lock, protecting vif table, mrt cache and mroute socket state. Note that the changes are semaphored via rtnl_lock. */ @@ -77,9 +107,7 @@ static DEFINE_RWLOCK(mrt_lock); * Multicast router control variables */ -#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL) - -static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ +#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); @@ -94,12 +122,215 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; -static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); -static int ipmr_cache_report(struct net *net, +static struct mr_table *ipmr_new_table(struct net *net, u32 id); +static int ip_mr_forward(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, struct mfc_cache *cache, + int local); +static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); -static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); +static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mfc_cache *c, struct rtmsg *rtm); +static void ipmr_expire_process(unsigned long arg); + +#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES +#define ipmr_for_each_table(mrt, net) \ + list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) + +static struct mr_table *ipmr_get_table(struct net *net, u32 id) +{ + struct mr_table *mrt; + + ipmr_for_each_table(mrt, net) { + if (mrt->id == id) + return mrt; + } + return NULL; +} + +static int ipmr_fib_lookup(struct net *net, struct flowi *flp, + struct mr_table **mrt) +{ + struct ipmr_result res; + struct fib_lookup_arg arg = { .result = &res, }; + int err; + + err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); + if (err < 0) + return err; + *mrt = res.mrt; + return 0; +} + +static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) +{ + struct ipmr_result *res = arg->result; + struct mr_table *mrt; + + switch (rule->action) { + case FR_ACT_TO_TBL: + break; + case FR_ACT_UNREACHABLE: + return -ENETUNREACH; + case FR_ACT_PROHIBIT: + return -EACCES; + case FR_ACT_BLACKHOLE: + default: + return -EINVAL; + } + + mrt = ipmr_get_table(rule->fr_net, rule->table); + if (mrt == NULL) + return -EAGAIN; + res->mrt = mrt; + return 0; +} + +static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + return 1; +} + +static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { + FRA_GENERIC_POLICY, +}; + +static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct fib_rule_hdr *frh, struct nlattr **tb) +{ + return 0; +} + +static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + return 1; +} + +static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct fib_rule_hdr *frh) +{ + frh->dst_len = 0; + frh->src_len = 0; + frh->tos = 0; + return 0; +} + +static struct fib_rules_ops ipmr_rules_ops_template = { + .family = FIB_RULES_IPMR, + .rule_size = sizeof(struct ipmr_rule), + .addr_size = sizeof(u32), + .action = ipmr_rule_action, + .match = ipmr_rule_match, + .configure = ipmr_rule_configure, + .compare = ipmr_rule_compare, + .default_pref = fib_default_rule_pref, + .fill = ipmr_rule_fill, + .nlgroup = RTNLGRP_IPV4_RULE, + .policy = ipmr_rule_policy, + .owner = THIS_MODULE, +}; + +static int __net_init ipmr_rules_init(struct net *net) +{ + struct fib_rules_ops *ops; + struct mr_table *mrt; + int err; + + ops = fib_rules_register(&ipmr_rules_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + + INIT_LIST_HEAD(&net->ipv4.mr_tables); + + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); + if (mrt == NULL) { + err = -ENOMEM; + goto err1; + } + + err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); + if (err < 0) + goto err2; + + net->ipv4.mr_rules_ops = ops; + return 0; + +err2: + kfree(mrt); +err1: + fib_rules_unregister(ops); + return err; +} + +static void __net_exit ipmr_rules_exit(struct net *net) +{ + struct mr_table *mrt, *next; + + list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) + kfree(mrt); + fib_rules_unregister(net->ipv4.mr_rules_ops); +} +#else +#define ipmr_for_each_table(mrt, net) \ + for (mrt = net->ipv4.mrt; mrt; mrt = NULL) + +static struct mr_table *ipmr_get_table(struct net *net, u32 id) +{ + return net->ipv4.mrt; +} + +static int ipmr_fib_lookup(struct net *net, struct flowi *flp, + struct mr_table **mrt) +{ + *mrt = net->ipv4.mrt; + return 0; +} + +static int __net_init ipmr_rules_init(struct net *net) +{ + net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); + return net->ipv4.mrt ? 0 : -ENOMEM; +} + +static void __net_exit ipmr_rules_exit(struct net *net) +{ + kfree(net->ipv4.mrt); +} +#endif + +static struct mr_table *ipmr_new_table(struct net *net, u32 id) +{ + struct mr_table *mrt; + unsigned int i; -static struct timer_list ipmr_expire_timer; + mrt = ipmr_get_table(net, id); + if (mrt != NULL) + return mrt; + + mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); + if (mrt == NULL) + return NULL; + write_pnet(&mrt->net, net); + mrt->id = id; + + /* Forwarding cache */ + for (i = 0; i < MFC_LINES; i++) + INIT_LIST_HEAD(&mrt->mfc_cache_array[i]); + + INIT_LIST_HEAD(&mrt->mfc_unres_queue); + + setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, + (unsigned long)mrt); + +#ifdef CONFIG_IP_PIMSM + mrt->mroute_reg_vif_num = -1; +#endif +#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES + list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); +#endif + return mrt; +} /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ @@ -200,12 +431,22 @@ failure: static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); + struct mr_table *mrt; + struct flowi fl = { + .oif = dev->ifindex, + .iif = skb->skb_iif, + .mark = skb->mark, + }; + int err; + + err = ipmr_fib_lookup(net, &fl, &mrt); + if (err < 0) + return err; read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num, - IGMPMSG_WHOLEPKT); + ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return NETDEV_TX_OK; @@ -225,12 +466,18 @@ static void reg_vif_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static struct net_device *ipmr_reg_vif(struct net *net) +static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; struct in_device *in_dev; + char name[IFNAMSIZ]; - dev = alloc_netdev(0, "pimreg", reg_vif_setup); + if (mrt->id == RT_TABLE_DEFAULT) + sprintf(name, "pimreg"); + else + sprintf(name, "pimreg%u", mrt->id); + + dev = alloc_netdev(0, name, reg_vif_setup); if (dev == NULL) return NULL; @@ -275,17 +522,17 @@ failure: * @notify: Set to 1, if the caller is a notifier_call */ -static int vif_delete(struct net *net, int vifi, int notify, +static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { struct vif_device *v; struct net_device *dev; struct in_device *in_dev; - if (vifi < 0 || vifi >= net->ipv4.maxvif) + if (vifi < 0 || vifi >= mrt->maxvif) return -EADDRNOTAVAIL; - v = &net->ipv4.vif_table[vifi]; + v = &mrt->vif_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -297,17 +544,17 @@ static int vif_delete(struct net *net, int vifi, int notify, } #ifdef CONFIG_IP_PIMSM - if (vifi == net->ipv4.mroute_reg_vif_num) - net->ipv4.mroute_reg_vif_num = -1; + if (vifi == mrt->mroute_reg_vif_num) + mrt->mroute_reg_vif_num = -1; #endif - if (vifi+1 == net->ipv4.maxvif) { + if (vifi+1 == mrt->maxvif) { int tmp; for (tmp=vifi-1; tmp>=0; tmp--) { - if (VIF_EXISTS(net, tmp)) + if (VIF_EXISTS(mrt, tmp)) break; } - net->ipv4.maxvif = tmp+1; + mrt->maxvif = tmp+1; } write_unlock_bh(&mrt_lock); @@ -328,7 +575,6 @@ static int vif_delete(struct net *net, int vifi, int notify, static inline void ipmr_cache_free(struct mfc_cache *c) { - release_net(mfc_net(c)); kmem_cache_free(mrt_cachep, c); } @@ -336,13 +582,13 @@ static inline void ipmr_cache_free(struct mfc_cache *c) and reporting error to netlink readers. */ -static void ipmr_destroy_unres(struct mfc_cache *c) +static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) { + struct net *net = read_pnet(&mrt->net); struct sk_buff *skb; struct nlmsgerr *e; - struct net *net = mfc_net(c); - atomic_dec(&net->ipv4.cache_resolve_queue_len); + atomic_dec(&mrt->cache_resolve_queue_len); while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { @@ -363,42 +609,40 @@ static void ipmr_destroy_unres(struct mfc_cache *c) } -/* Single timer process for all the unresolved queue. */ +/* Timer process for the unresolved queue. */ -static void ipmr_expire_process(unsigned long dummy) +static void ipmr_expire_process(unsigned long arg) { + struct mr_table *mrt = (struct mr_table *)arg; unsigned long now; unsigned long expires; - struct mfc_cache *c, **cp; + struct mfc_cache *c, *next; if (!spin_trylock(&mfc_unres_lock)) { - mod_timer(&ipmr_expire_timer, jiffies+HZ/10); + mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); return; } - if (mfc_unres_queue == NULL) + if (list_empty(&mrt->mfc_unres_queue)) goto out; now = jiffies; expires = 10*HZ; - cp = &mfc_unres_queue; - while ((c=*cp) != NULL) { + list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { if (time_after(c->mfc_un.unres.expires, now)) { unsigned long interval = c->mfc_un.unres.expires - now; if (interval < expires) expires = interval; - cp = &c->next; continue; } - *cp = c->next; - - ipmr_destroy_unres(c); + list_del(&c->list); + ipmr_destroy_unres(mrt, c); } - if (mfc_unres_queue != NULL) - mod_timer(&ipmr_expire_timer, jiffies + expires); + if (!list_empty(&mrt->mfc_unres_queue)) + mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); out: spin_unlock(&mfc_unres_lock); @@ -406,17 +650,17 @@ out: /* Fill oifs list. It is called under write locked mrt_lock. */ -static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) +static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, + unsigned char *ttls) { int vifi; - struct net *net = mfc_net(cache); cache->mfc_un.res.minvif = MAXVIFS; cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXVIFS); - for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) { - if (VIF_EXISTS(net, vifi) && + for (vifi = 0; vifi < mrt->maxvif; vifi++) { + if (VIF_EXISTS(mrt, vifi) && ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) @@ -427,16 +671,17 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) } } -static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) +static int vif_add(struct net *net, struct mr_table *mrt, + struct vifctl *vifc, int mrtsock) { int vifi = vifc->vifc_vifi; - struct vif_device *v = &net->ipv4.vif_table[vifi]; + struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; int err; /* Is vif busy ? */ - if (VIF_EXISTS(net, vifi)) + if (VIF_EXISTS(mrt, vifi)) return -EADDRINUSE; switch (vifc->vifc_flags) { @@ -446,9 +691,9 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ - if (net->ipv4.mroute_reg_vif_num >= 0) + if (mrt->mroute_reg_vif_num >= 0) return -EADDRINUSE; - dev = ipmr_reg_vif(net); + dev = ipmr_reg_vif(net, mrt); if (!dev) return -ENOBUFS; err = dev_set_allmulti(dev, 1); @@ -524,49 +769,47 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) v->dev = dev; #ifdef CONFIG_IP_PIMSM if (v->flags&VIFF_REGISTER) - net->ipv4.mroute_reg_vif_num = vifi; + mrt->mroute_reg_vif_num = vifi; #endif - if (vifi+1 > net->ipv4.maxvif) - net->ipv4.maxvif = vifi+1; + if (vifi+1 > mrt->maxvif) + mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); return 0; } -static struct mfc_cache *ipmr_cache_find(struct net *net, +static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, __be32 origin, __be32 mcastgrp) { int line = MFC_HASH(mcastgrp, origin); struct mfc_cache *c; - for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) { - if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) - break; + list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { + if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) + return c; } - return c; + return NULL; } /* * Allocate a multicast cache entry */ -static struct mfc_cache *ipmr_cache_alloc(struct net *net) +static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c == NULL) return NULL; c->mfc_un.res.minvif = MAXVIFS; - mfc_net_set(c, net); return c; } -static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) +static struct mfc_cache *ipmr_cache_alloc_unres(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (c == NULL) return NULL; skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10*HZ; - mfc_net_set(c, net); return c; } @@ -574,7 +817,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) * A cache entry has gone into a resolved state from queued */ -static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) +static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, + struct mfc_cache *uc, struct mfc_cache *c) { struct sk_buff *skb; struct nlmsgerr *e; @@ -587,7 +831,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); - if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { + if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { nlh->nlmsg_len = (skb_tail_pointer(skb) - (u8 *)nlh); } else { @@ -599,9 +843,9 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).pid); } else - ip_mr_forward(skb, c, 0); + ip_mr_forward(net, mrt, skb, c, 0); } } @@ -612,7 +856,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) * Called under mrt_lock. */ -static int ipmr_cache_report(struct net *net, +static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert) { struct sk_buff *skb; @@ -645,7 +889,7 @@ static int ipmr_cache_report(struct net *net, memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); msg->im_msgtype = IGMPMSG_WHOLEPKT; msg->im_mbz = 0; - msg->im_vif = net->ipv4.mroute_reg_vif_num; + msg->im_vif = mrt->mroute_reg_vif_num; ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + sizeof(struct iphdr)); @@ -677,7 +921,7 @@ static int ipmr_cache_report(struct net *net, skb->transport_header = skb->network_header; } - if (net->ipv4.mroute_sk == NULL) { + if (mrt->mroute_sk == NULL) { kfree_skb(skb); return -EINVAL; } @@ -685,7 +929,7 @@ static int ipmr_cache_report(struct net *net, /* * Deliver to mrouted */ - ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb); + ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); if (ret < 0) { if (net_ratelimit()) printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); @@ -700,27 +944,29 @@ static int ipmr_cache_report(struct net *net, */ static int -ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) +ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) { + bool found = false; int err; struct mfc_cache *c; const struct iphdr *iph = ip_hdr(skb); spin_lock_bh(&mfc_unres_lock); - for (c=mfc_unres_queue; c; c=c->next) { - if (net_eq(mfc_net(c), net) && - c->mfc_mcastgrp == iph->daddr && - c->mfc_origin == iph->saddr) + list_for_each_entry(c, &mrt->mfc_unres_queue, list) { + if (c->mfc_mcastgrp == iph->daddr && + c->mfc_origin == iph->saddr) { + found = true; break; + } } - if (c == NULL) { + if (!found) { /* * Create a new entry if allowable */ - if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 || - (c = ipmr_cache_alloc_unres(net)) == NULL) { + if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || + (c = ipmr_cache_alloc_unres()) == NULL) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); @@ -737,7 +983,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) /* * Reflect first query at mrouted. */ - err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE); + err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); if (err < 0) { /* If the report failed throw the cache entry out - Brad Parker @@ -749,11 +995,10 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) return err; } - atomic_inc(&net->ipv4.cache_resolve_queue_len); - c->next = mfc_unres_queue; - mfc_unres_queue = c; + atomic_inc(&mrt->cache_resolve_queue_len); + list_add(&c->list, &mrt->mfc_unres_queue); - mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); + mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); } /* @@ -775,19 +1020,18 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) * MFC cache manipulation by user space mroute daemon */ -static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) +static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) { int line; - struct mfc_cache *c, **cp; + struct mfc_cache *c, *next; line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); - for (cp = &net->ipv4.mfc_cache_array[line]; - (c = *cp) != NULL; cp = &c->next) { + list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { write_lock_bh(&mrt_lock); - *cp = c->next; + list_del(&c->list); write_unlock_bh(&mrt_lock); ipmr_cache_free(c); @@ -797,24 +1041,30 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) return -ENOENT; } -static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) +static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, + struct mfcctl *mfc, int mrtsock) { + bool found = false; int line; - struct mfc_cache *uc, *c, **cp; + struct mfc_cache *uc, *c; + + if (mfc->mfcc_parent >= MAXVIFS) + return -ENFILE; line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); - for (cp = &net->ipv4.mfc_cache_array[line]; - (c = *cp) != NULL; cp = &c->next) { + list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + found = true; break; + } } - if (c != NULL) { + if (found) { write_lock_bh(&mrt_lock); c->mfc_parent = mfc->mfcc_parent; - ipmr_update_thresholds(c, mfc->mfcc_ttls); + ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); if (!mrtsock) c->mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); @@ -824,43 +1074,42 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; - c = ipmr_cache_alloc(net); + c = ipmr_cache_alloc(); if (c == NULL) return -ENOMEM; c->mfc_origin = mfc->mfcc_origin.s_addr; c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; c->mfc_parent = mfc->mfcc_parent; - ipmr_update_thresholds(c, mfc->mfcc_ttls); + ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); if (!mrtsock) c->mfc_flags |= MFC_STATIC; write_lock_bh(&mrt_lock); - c->next = net->ipv4.mfc_cache_array[line]; - net->ipv4.mfc_cache_array[line] = c; + list_add(&c->list, &mrt->mfc_cache_array[line]); write_unlock_bh(&mrt_lock); /* * Check to see if we resolved a queued list. If so we * need to send on the frames and tidy up. */ + found = false; spin_lock_bh(&mfc_unres_lock); - for (cp = &mfc_unres_queue; (uc=*cp) != NULL; - cp = &uc->next) { - if (net_eq(mfc_net(uc), net) && - uc->mfc_origin == c->mfc_origin && + list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { + if (uc->mfc_origin == c->mfc_origin && uc->mfc_mcastgrp == c->mfc_mcastgrp) { - *cp = uc->next; - atomic_dec(&net->ipv4.cache_resolve_queue_len); + list_del(&uc->list); + atomic_dec(&mrt->cache_resolve_queue_len); + found = true; break; } } - if (mfc_unres_queue == NULL) - del_timer(&ipmr_expire_timer); + if (list_empty(&mrt->mfc_unres_queue)) + del_timer(&mrt->ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); - if (uc) { - ipmr_cache_resolve(uc, c); + if (found) { + ipmr_cache_resolve(net, mrt, uc, c); ipmr_cache_free(uc); } return 0; @@ -870,53 +1119,41 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct net *net) +static void mroute_clean_tables(struct mr_table *mrt) { int i; LIST_HEAD(list); + struct mfc_cache *c, *next; /* * Shut down all active vif entries */ - for (i = 0; i < net->ipv4.maxvif; i++) { - if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) - vif_delete(net, i, 0, &list); + for (i = 0; i < mrt->maxvif; i++) { + if (!(mrt->vif_table[i].flags&VIFF_STATIC)) + vif_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); /* * Wipe the cache */ - for (i=0; i<MFC_LINES; i++) { - struct mfc_cache *c, **cp; - - cp = &net->ipv4.mfc_cache_array[i]; - while ((c = *cp) != NULL) { - if (c->mfc_flags&MFC_STATIC) { - cp = &c->next; + for (i = 0; i < MFC_LINES; i++) { + list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { + if (c->mfc_flags&MFC_STATIC) continue; - } write_lock_bh(&mrt_lock); - *cp = c->next; + list_del(&c->list); write_unlock_bh(&mrt_lock); ipmr_cache_free(c); } } - if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) { - struct mfc_cache *c, **cp; - + if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { spin_lock_bh(&mfc_unres_lock); - cp = &mfc_unres_queue; - while ((c = *cp) != NULL) { - if (!net_eq(mfc_net(c), net)) { - cp = &c->next; - continue; - } - *cp = c->next; - - ipmr_destroy_unres(c); + list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { + list_del(&c->list); + ipmr_destroy_unres(mrt, c); } spin_unlock_bh(&mfc_unres_lock); } @@ -925,16 +1162,19 @@ static void mroute_clean_tables(struct net *net) static void mrtsock_destruct(struct sock *sk) { struct net *net = sock_net(sk); + struct mr_table *mrt; rtnl_lock(); - if (sk == net->ipv4.mroute_sk) { - IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; + ipmr_for_each_table(mrt, net) { + if (sk == mrt->mroute_sk) { + IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; - write_lock_bh(&mrt_lock); - net->ipv4.mroute_sk = NULL; - write_unlock_bh(&mrt_lock); + write_lock_bh(&mrt_lock); + mrt->mroute_sk = NULL; + write_unlock_bh(&mrt_lock); - mroute_clean_tables(net); + mroute_clean_tables(mrt); + } } rtnl_unlock(); } @@ -952,9 +1192,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi struct vifctl vif; struct mfcctl mfc; struct net *net = sock_net(sk); + struct mr_table *mrt; + + mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); + if (mrt == NULL) + return -ENOENT; if (optname != MRT_INIT) { - if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) + if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -967,7 +1212,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return -ENOPROTOOPT; rtnl_lock(); - if (net->ipv4.mroute_sk) { + if (mrt->mroute_sk) { rtnl_unlock(); return -EADDRINUSE; } @@ -975,7 +1220,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { write_lock_bh(&mrt_lock); - net->ipv4.mroute_sk = sk; + mrt->mroute_sk = sk; write_unlock_bh(&mrt_lock); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; @@ -983,7 +1228,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; case MRT_DONE: - if (sk != net->ipv4.mroute_sk) + if (sk != mrt->mroute_sk) return -EACCES; return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: @@ -996,9 +1241,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return -ENFILE; rtnl_lock(); if (optname == MRT_ADD_VIF) { - ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); + ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); } else { - ret = vif_delete(net, vif.vifc_vifi, 0, NULL); + ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); } rtnl_unlock(); return ret; @@ -1015,9 +1260,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return -EFAULT; rtnl_lock(); if (optname == MRT_DEL_MFC) - ret = ipmr_mfc_delete(net, &mfc); + ret = ipmr_mfc_delete(mrt, &mfc); else - ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk); + ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); rtnl_unlock(); return ret; /* @@ -1028,7 +1273,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi int v; if (get_user(v,(int __user *)optval)) return -EFAULT; - net->ipv4.mroute_do_assert = (v) ? 1 : 0; + mrt->mroute_do_assert = (v) ? 1 : 0; return 0; } #ifdef CONFIG_IP_PIMSM @@ -1042,14 +1287,35 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_lock(); ret = 0; - if (v != net->ipv4.mroute_do_pim) { - net->ipv4.mroute_do_pim = v; - net->ipv4.mroute_do_assert = v; + if (v != mrt->mroute_do_pim) { + mrt->mroute_do_pim = v; + mrt->mroute_do_assert = v; } rtnl_unlock(); return ret; } #endif +#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES + case MRT_TABLE: + { + u32 v; + + if (optlen != sizeof(u32)) + return -EINVAL; + if (get_user(v, (u32 __user *)optval)) + return -EFAULT; + if (sk == mrt->mroute_sk) + return -EBUSY; + + rtnl_lock(); + ret = 0; + if (!ipmr_new_table(net, v)) + ret = -ENOMEM; + raw_sk(sk)->ipmr_table = v; + rtnl_unlock(); + return ret; + } +#endif /* * Spurious command, or MRT_VERSION which you cannot * set. @@ -1068,6 +1334,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int int olr; int val; struct net *net = sock_net(sk); + struct mr_table *mrt; + + mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); + if (mrt == NULL) + return -ENOENT; if (optname != MRT_VERSION && #ifdef CONFIG_IP_PIMSM @@ -1089,10 +1360,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int val = 0x0305; #ifdef CONFIG_IP_PIMSM else if (optname == MRT_PIM) - val = net->ipv4.mroute_do_pim; + val = mrt->mroute_do_pim; #endif else - val = net->ipv4.mroute_do_assert; + val = mrt->mroute_do_assert; if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; @@ -1109,16 +1380,21 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) struct vif_device *vif; struct mfc_cache *c; struct net *net = sock_net(sk); + struct mr_table *mrt; + + mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); + if (mrt == NULL) + return -ENOENT; switch (cmd) { case SIOCGETVIFCNT: if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; - if (vr.vifi >= net->ipv4.maxvif) + if (vr.vifi >= mrt->maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &net->ipv4.vif_table[vr.vifi]; - if (VIF_EXISTS(net, vr.vifi)) { + vif = &mrt->vif_table[vr.vifi]; + if (VIF_EXISTS(mrt, vr.vifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1136,7 +1412,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; read_lock(&mrt_lock); - c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr); + c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); if (c) { sr.pktcnt = c->mfc_un.res.pkt; sr.bytecnt = c->mfc_un.res.bytes; @@ -1159,16 +1435,20 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v { struct net_device *dev = ptr; struct net *net = dev_net(dev); + struct mr_table *mrt; struct vif_device *v; int ct; LIST_HEAD(list); if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - v = &net->ipv4.vif_table[0]; - for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { - if (v->dev == dev) - vif_delete(net, ct, 1, &list); + + ipmr_for_each_table(mrt, net) { + v = &mrt->vif_table[0]; + for (ct = 0; ct < mrt->maxvif; ct++, v++) { + if (v->dev == dev) + vif_delete(mrt, ct, 1, &list); + } } unregister_netdevice_many(&list); return NOTIFY_DONE; @@ -1227,11 +1507,11 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) * Processing handlers for ipmr_forward */ -static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) +static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, struct mfc_cache *c, int vifi) { - struct net *net = mfc_net(c); const struct iphdr *iph = ip_hdr(skb); - struct vif_device *vif = &net->ipv4.vif_table[vifi]; + struct vif_device *vif = &mrt->vif_table[vifi]; struct net_device *dev; struct rtable *rt; int encap = 0; @@ -1245,7 +1525,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) |