diff options
Diffstat (limited to 'net/ipv4/igmp.c')
| -rw-r--r-- | net/ipv4/igmp.c | 1235 |
1 files changed, 721 insertions, 514 deletions
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d8ce7133cd8..db710b059ba 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -8,10 +8,8 @@ * the older version didn't come out right using gcc 2.5.8, the newer one * seems to fall out with gcc 2.6.2. * - * Version: $Id: igmp.c,v 1.47 2002/02/01 22:01:03 davem Exp $ - * * Authors: - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -35,7 +33,7 @@ * * Chih-Jen Chang : Tried to revise IGMP to Version 2 * Tsu-Sheng Tsao E-mail: chihjenc@scf.usc.edu and tsusheng@scf.usc.edu - * The enhancements are mainly based on Steve Deering's + * The enhancements are mainly based on Steve Deering's * ipmulti-3.5 source code. * Chih-Jen Chang : Added the igmp_get_mrouter_info and * Tsu-Sheng Tsao igmp_set_mrouter_info to keep track of @@ -49,11 +47,11 @@ * Alan Cox : Stop IGMP from 0.0.0.0 being accepted. * Alan Cox : Use GFP_ATOMIC in the right places. * Christian Daudt : igmp timer wasn't set for local group - * memberships but was being deleted, - * which caused a "del_timer() called + * memberships but was being deleted, + * which caused a "del_timer() called * from %p with timer not initialized\n" * message (960131). - * Christian Daudt : removed del_timer from + * Christian Daudt : removed del_timer from * igmp_timer_expire function (960205). * Christian Daudt : igmp_heard_report now only calls * igmp_timer_expire if tm->running is @@ -72,10 +70,9 @@ * Vinay Kulkarni */ -#include <linux/config.h> #include <linux/module.h> +#include <linux/slab.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/jiffies.h> @@ -91,7 +88,9 @@ #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/times.h> +#include <linux/pkt_sched.h> +#include <net/net_namespace.h> #include <net/arp.h> #include <net/ip.h> #include <net/protocol.h> @@ -115,7 +114,8 @@ #define IGMP_V1_Router_Present_Timeout (400*HZ) #define IGMP_V2_Router_Present_Timeout (400*HZ) -#define IGMP_Unsolicited_Report_Interval (10*HZ) +#define IGMP_V2_Unsolicited_Report_Interval (10*HZ) +#define IGMP_V3_Unsolicited_Report_Interval (1*HZ) #define IGMP_Query_Response_Interval (10*HZ) #define IGMP_Unsolicited_Report_Count 2 @@ -129,45 +129,80 @@ * contradict to specs provided this delay is small enough. */ -#define IGMP_V1_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 1 || \ - (in_dev)->cnf.force_igmp_version == 1 || \ - ((in_dev)->mr_v1_seen && \ - time_before(jiffies, (in_dev)->mr_v1_seen))) -#define IGMP_V2_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 2 || \ - (in_dev)->cnf.force_igmp_version == 2 || \ - ((in_dev)->mr_v2_seen && \ - time_before(jiffies, (in_dev)->mr_v2_seen))) +#define IGMP_V1_SEEN(in_dev) \ + (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ + IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ + ((in_dev)->mr_v1_seen && \ + time_before(jiffies, (in_dev)->mr_v1_seen))) +#define IGMP_V2_SEEN(in_dev) \ + (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ + IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ + ((in_dev)->mr_v2_seen && \ + time_before(jiffies, (in_dev)->mr_v2_seen))) + +static int unsolicited_report_interval(struct in_device *in_dev) +{ + int interval_ms, interval_jiffies; + + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) + interval_ms = IN_DEV_CONF_GET( + in_dev, + IGMPV2_UNSOLICITED_REPORT_INTERVAL); + else /* v3 */ + interval_ms = IN_DEV_CONF_GET( + in_dev, + IGMPV3_UNSOLICITED_REPORT_INTERVAL); + + interval_jiffies = msecs_to_jiffies(interval_ms); + + /* _timer functions can't handle a delay of 0 jiffies so ensure + * we always return a positive value. + */ + if (interval_jiffies <= 0) + interval_jiffies = 1; + return interval_jiffies; +} static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); -static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr); +static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); static void igmpv3_clear_delrec(struct in_device *in_dev); static int sf_setstate(struct ip_mc_list *pmc); static void sf_markstate(struct ip_mc_list *pmc); #endif static void ip_mc_clear_src(struct ip_mc_list *pmc); -static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta); +static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta); static void ip_ma_put(struct ip_mc_list *im) { if (atomic_dec_and_test(&im->refcnt)) { in_dev_put(im->interface); - kfree(im); + kfree_rcu(im, rcu); } } +#define for_each_pmc_rcu(in_dev, pmc) \ + for (pmc = rcu_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next_rcu)) + +#define for_each_pmc_rtnl(in_dev, pmc) \ + for (pmc = rtnl_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rtnl_dereference(pmc->next_rcu)) + #ifdef CONFIG_IP_MULTICAST /* * Timer management */ -static __inline__ void igmp_stop_timer(struct ip_mc_list *im) +static void igmp_stop_timer(struct ip_mc_list *im) { spin_lock_bh(&im->lock); if (del_timer(&im->timer)) atomic_dec(&im->refcnt); - im->tm_running=0; + im->tm_running = 0; im->reporter = 0; im->unsolicit_count = 0; spin_unlock_bh(&im->lock); @@ -176,16 +211,16 @@ static __inline__ void igmp_stop_timer(struct ip_mc_list *im) /* It must be called with locked im->lock */ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) { - int tv=net_random() % max_delay; + int tv = prandom_u32() % max_delay; - im->tm_running=1; + im->tm_running = 1; if (!mod_timer(&im->timer, jiffies+tv+2)) atomic_inc(&im->refcnt); } static void igmp_gq_start_timer(struct in_device *in_dev) { - int tv = net_random() % in_dev->mr_maxdelay; + int tv = prandom_u32() % in_dev->mr_maxdelay; in_dev->mr_gq_running = 1; if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) @@ -194,7 +229,7 @@ static void igmp_gq_start_timer(struct in_device *in_dev) static void igmp_ifc_start_timer(struct in_device *in_dev, int delay) { - int tv = net_random() % delay; + int tv = prandom_u32() % delay; if (!mod_timer(&in_dev->mr_ifc_timer, jiffies+tv+2)) in_dev_hold(in_dev); @@ -207,7 +242,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay) if (del_timer(&im->timer)) { if ((long)(im->timer.expires-jiffies) < max_delay) { add_timer(&im->timer); - im->tm_running=1; + im->tm_running = 1; spin_unlock_bh(&im->lock); return; } @@ -275,7 +310,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) struct ip_sf_list *psf; int scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (!is_in(pmc, psf, type, gdeleted, sdeleted)) continue; scount++; @@ -283,57 +318,66 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } +#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb)) + static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) { struct sk_buff *skb; struct rtable *rt; struct iphdr *pip; struct igmpv3_report *pig; - - skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC); - if (skb == NULL) - return NULL; - - { - struct flowi fl = { .oif = dev->ifindex, - .nl_u = { .ip4_u = { - .daddr = IGMPV3_ALL_MCR } }, - .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&rt, &fl)) { - kfree_skb(skb); + struct net *net = dev_net(dev); + struct flowi4 fl4; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; + + while (1) { + skb = alloc_skb(size + hlen + tlen, + GFP_ATOMIC | __GFP_NOWARN); + if (skb) + break; + size >>= 1; + if (size < 256) return NULL; - } } - if (rt->rt_src == 0) { + skb->priority = TC_PRIO_CONTROL; + igmp_skb_size(skb) = size; + + rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0, + 0, 0, + IPPROTO_IGMP, 0, dev->ifindex); + if (IS_ERR(rt)) { kfree_skb(skb); - ip_rt_put(rt); return NULL; } - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->dst); skb->dev = dev; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); - skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4); + skb_reset_network_header(skb); + pip = ip_hdr(skb); + skb_put(skb, sizeof(struct iphdr) + 4); pip->version = 4; pip->ihl = (sizeof(struct iphdr)+4)>>2; pip->tos = 0xc0; pip->frag_off = htons(IP_DF); pip->ttl = 1; - pip->daddr = rt->rt_dst; - pip->saddr = rt->rt_src; + pip->daddr = fl4.daddr; + pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(pip, &rt->u.dst, NULL); - ((u8*)&pip[1])[0] = IPOPT_RA; - ((u8*)&pip[1])[1] = 4; - ((u8*)&pip[1])[2] = 0; - ((u8*)&pip[1])[3] = 0; - - pig =(struct igmpv3_report *)skb_put(skb, sizeof(*pig)); - skb->h.igmph = (struct igmphdr *)pig; + ip_select_ident(skb, NULL); + ((u8 *)&pip[1])[0] = IPOPT_RA; + ((u8 *)&pip[1])[1] = 4; + ((u8 *)&pip[1])[2] = 0; + ((u8 *)&pip[1])[3] = 0; + + skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4; + skb_put(skb, sizeof(*pig)); + pig = igmpv3_report_hdr(skb); pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT; pig->resv1 = 0; pig->csum = 0; @@ -344,24 +388,17 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) static int igmpv3_sendpack(struct sk_buff *skb) { - struct iphdr *pip = skb->nh.iph; - struct igmphdr *pig = skb->h.igmph; - int iplen, igmplen; - - iplen = skb->tail - (unsigned char *)skb->nh.iph; - pip->tot_len = htons(iplen); - ip_send_check(pip); + struct igmphdr *pig = igmp_hdr(skb); + const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb); - igmplen = skb->tail - (unsigned char *)skb->h.igmph; - pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen); + pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); - return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev, - dst_output); + return ip_local_out(skb); } static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) { - return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc,type,gdel,sdel); + return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc, type, gdel, sdel); } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, @@ -380,13 +417,13 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, pgr->grec_auxwords = 0; pgr->grec_nsrcs = 0; pgr->grec_mca = pmc->multiaddr; - pih = (struct igmpv3_report *)skb->h.igmph; + pih = igmpv3_report_hdr(skb); pih->ngrec = htons(ntohs(pih->ngrec)+1); *ppgr = pgr; return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \ +#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \ skb_tailroom(skb)) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, @@ -413,7 +450,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (!*psf_list) goto empty_source; - pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL; + pih = skb ? igmpv3_report_hdr(skb) : NULL; /* EX and TO_EX get a fresh packet, if needed */ if (truncate) { @@ -426,8 +463,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, } first = 1; psf_prev = NULL; - for (psf=*psf_list; psf; psf=psf_next) { - u32 *psrc; + for (psf = *psf_list; psf; psf = psf_next) { + __be32 *psrc; psf_next = psf->sf_next; @@ -440,7 +477,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (isquery) psf->sf_gsresp = 0; - if (AVAILABLE(skb) < sizeof(u32) + + if (AVAILABLE(skb) < sizeof(__be32) + first*sizeof(struct igmpv3_grec)) { if (truncate && !first) break; /* truncate these */ @@ -456,7 +493,9 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, skb = add_grhead(skb, pmc, type, &pgr); first = 0; } - psrc = (u32 *)skb_put(skb, sizeof(u32)); + if (!skb) + return NULL; + psrc = (__be32 *)skb_put(skb, sizeof(__be32)); *psrc = psf->sf_inaddr; scount++; stotal++; if ((type == IGMPV3_ALLOW_NEW_SOURCES || @@ -481,7 +520,7 @@ empty_source: return skb; if (pmc->crcount || isquery) { /* make sure we have room for group header */ - if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) { + if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)) { igmpv3_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } @@ -502,8 +541,8 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) int type; if (!pmc) { - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; spin_lock_bh(&pmc->lock); @@ -514,7 +553,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } else { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -537,7 +576,7 @@ static void igmpv3_clear_zeros(struct ip_sf_list **ppsf) struct ip_sf_list *psf_prev, *psf_next, *psf; psf_prev = NULL; - for (psf=*ppsf; psf; psf = psf_next) { + for (psf = *ppsf; psf; psf = psf_next) { psf_next = psf->sf_next; if (psf->sf_crcount == 0) { if (psf_prev) @@ -556,12 +595,12 @@ static void igmpv3_send_cr(struct in_device *in_dev) struct sk_buff *skb = NULL; int type, dtype; - read_lock(&in_dev->mc_list_lock); + rcu_read_lock(); spin_lock_bh(&in_dev->mc_tomb_lock); /* deleted MCA's */ pmc_prev = NULL; - for (pmc=in_dev->mc_tomb; pmc; pmc=pmc_next) { + for (pmc = in_dev->mc_tomb; pmc; pmc = pmc_next) { pmc_next = pmc->next; if (pmc->sfmode == MCAST_INCLUDE) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -593,7 +632,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) spin_unlock_bh(&in_dev->mc_tomb_lock); /* change recs */ - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(in_dev, pmc) { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -616,7 +655,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) } spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); if (!skb) return; @@ -631,8 +670,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct igmphdr *ih; struct rtable *rt; struct net_device *dev = in_dev->dev; - u32 group = pmc ? pmc->multiaddr : 0; - u32 dst; + struct net *net = dev_net(dev); + __be32 group = pmc ? pmc->multiaddr : 0; + struct flowi4 fl4; + __be32 dst; + int hlen, tlen; if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); @@ -641,29 +683,28 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, else dst = group; - { - struct flowi fl = { .oif = dev->ifindex, - .nl_u = { .ip4_u = { .daddr = dst } }, - .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&rt, &fl)) - return -1; - } - if (rt->rt_src == 0) { - ip_rt_put(rt); + rt = ip_route_output_ports(net, &fl4, NULL, dst, 0, + 0, 0, + IPPROTO_IGMP, 0, dev->ifindex); + if (IS_ERR(rt)) return -1; - } - skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC); + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; } + skb->priority = TC_PRIO_CONTROL; - skb->dst = &rt->u.dst; + skb_dst_set(skb, &rt->dst); - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); - skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + skb_put(skb, sizeof(struct iphdr) + 4); iph->version = 4; iph->ihl = (sizeof(struct iphdr)+4)>>2; @@ -671,25 +712,22 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->daddr = dst; - iph->saddr = rt->rt_src; + iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - iph->tot_len = htons(IGMP_SIZE); - ip_select_ident(iph, &rt->u.dst, NULL); - ((u8*)&iph[1])[0] = IPOPT_RA; - ((u8*)&iph[1])[1] = 4; - ((u8*)&iph[1])[2] = 0; - ((u8*)&iph[1])[3] = 0; - ip_send_check(iph); + ip_select_ident(skb, NULL); + ((u8 *)&iph[1])[0] = IPOPT_RA; + ((u8 *)&iph[1])[1] = 4; + ((u8 *)&iph[1])[2] = 0; + ((u8 *)&iph[1])[3] = 0; ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); - ih->type=type; - ih->code=0; - ih->csum=0; - ih->group=group; - ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr)); + ih->type = type; + ih->code = 0; + ih->csum = 0; + ih->group = group; + ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr)); - return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, - dst_output); + return ip_local_out(skb); } static void igmp_gq_timer_expire(unsigned long data) @@ -698,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data) in_dev->mr_gq_running = 0; igmpv3_send_report(in_dev, NULL); - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_timer_expire(unsigned long data) @@ -708,16 +746,17 @@ static void igmp_ifc_timer_expire(unsigned long data) igmpv3_send_cr(in_dev); if (in_dev->mr_ifc_count) { in_dev->mr_ifc_count--; - igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval); + igmp_ifc_start_timer(in_dev, + unsolicited_report_interval(in_dev)); } - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_event(struct in_device *in_dev) { if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv : + in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; igmp_ifc_start_timer(in_dev, 1); } @@ -725,15 +764,15 @@ static void igmp_ifc_event(struct in_device *in_dev) static void igmp_timer_expire(unsigned long data) { - struct ip_mc_list *im=(struct ip_mc_list *)data; + struct ip_mc_list *im = (struct ip_mc_list *)data; struct in_device *in_dev = im->interface; spin_lock(&im->lock); - im->tm_running=0; + im->tm_running = 0; if (im->unsolicit_count) { im->unsolicit_count--; - igmp_start_timer(im, IGMP_Unsolicited_Report_Interval); + igmp_start_timer(im, unsolicited_report_interval(in_dev)); } im->reporter = 1; spin_unlock(&im->lock); @@ -749,21 +788,21 @@ static void igmp_timer_expire(unsigned long data) } /* mark EXCLUDE-mode sources */ -static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) +static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) { struct ip_sf_list *psf; int i, scount; scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) { + for (i = 0; i < nsrcs; i++) { /* skip inactive filters */ - if (pmc->sfcount[MCAST_INCLUDE] || + if (psf->sf_count[MCAST_INCLUDE] || pmc->sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) - continue; + break; if (srcs[i] == psf->sf_inaddr) { scount++; break; @@ -776,7 +815,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) return 1; } -static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) +static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) { struct ip_sf_list *psf; int i, scount; @@ -786,10 +825,10 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) /* mark INCLUDE-mode sources */ scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) + for (i = 0; i < nsrcs; i++) if (srcs[i] == psf->sf_inaddr) { psf->sf_gsresp = 1; scount++; @@ -804,32 +843,35 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) return 1; } -static void igmp_heard_report(struct in_device *in_dev, u32 group) +/* return true if packet was dropped */ +static bool igmp_heard_report(struct in_device *in_dev, __be32 group) { struct ip_mc_list *im; /* Timers are only set for non-local groups */ if (group == IGMP_ALL_HOSTS) - return; + return false; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == group) { igmp_stop_timer(im); break; } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); + return false; } -static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, +/* return true if packet was dropped */ +static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, int len) { - struct igmphdr *ih = skb->h.igmph; - struct igmpv3_query *ih3 = (struct igmpv3_query *)ih; + struct igmphdr *ih = igmp_hdr(skb); + struct igmpv3_query *ih3 = igmpv3_query_hdr(skb); struct ip_mc_list *im; - u32 group = ih->group; + __be32 group = ih->group; int max_delay; int mark = 0; @@ -837,7 +879,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (len == 8) { if (ih->code == 0) { /* Alas, old v1 router presents here. */ - + max_delay = IGMP_Query_Response_Interval; in_dev->mr_v1_seen = jiffies + IGMP_V1_Router_Present_Timeout; @@ -855,17 +897,31 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, /* clear deleted report items */ igmpv3_clear_delrec(in_dev); } else if (len < 12) { - return; /* ignore bogus packet; freed by caller */ + return true; /* ignore bogus packet; freed by caller */ + } else if (IGMP_V1_SEEN(in_dev)) { + /* This is a v3 query with v1 queriers present */ + max_delay = IGMP_Query_Response_Interval; + group = 0; + } else if (IGMP_V2_SEEN(in_dev)) { + /* this is a v3 query with v2 queriers present; + * Interpretation of the max_delay code is problematic here. + * A real v2 host would use ih_code directly, while v3 has a + * different encoding. We use the v3 encoding as more likely + * to be intended in a v3 query. + */ + max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); + if (!max_delay) + max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) - return; - - ih3 = (struct igmpv3_query *) skb->h.raw; + return true; + + ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) { - if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) - + ntohs(ih3->nsrcs)*sizeof(__u32))) - return; - ih3 = (struct igmpv3_query *) skb->h.raw; + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) + + ntohs(ih3->nsrcs)*sizeof(__be32))) + return true; + ih3 = igmpv3_query_hdr(skb); } max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); @@ -876,9 +932,9 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, in_dev->mr_qrv = ih3->qrv; if (!group) { /* general query */ if (ih3->nsrcs) - return; /* no sources allowed */ + return false; /* no sources allowed */ igmp_gq_start_timer(in_dev); - return; + return false; } /* mark sources to include, if group & source-specific */ mark = ih3->nsrcs != 0; @@ -894,8 +950,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * - Use the igmp->igmp_code field as the maximum * delay possible */ - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { int changed; if (group && group != im->multiaddr) @@ -908,61 +964,53 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, else im->gsquery = mark; changed = !im->gsquery || - igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs); + igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs); spin_unlock_bh(&im->lock); if (changed) igmp_mod_timer(im, max_delay); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); + return false; } +/* called in rcu_read_lock() section */ int igmp_rcv(struct sk_buff *skb) { /* This basically follows the spec line by line -- see RFC1112 */ struct igmphdr *ih; - struct in_device *in_dev = in_dev_get(skb->dev); + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); int len = skb->len; + bool dropped = true; - if (in_dev==NULL) { - kfree_skb(skb); - return 0; - } + if (in_dev == NULL) + goto drop; if (!pskb_may_pull(skb, sizeof(struct igmphdr))) goto drop; - switch (skb->ip_summed) { - case CHECKSUM_HW: - if (!(u16)csum_fold(skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - if (__skb_checksum_complete(skb)) - goto drop; - } + if (skb_checksum_simple_validate(skb)) + goto drop; - ih = skb->h.igmph; + ih = igmp_hdr(skb); switch (ih->type) { case IGMP_HOST_MEMBERSHIP_QUERY: - igmp_heard_query(in_dev, skb, len); + dropped = igmp_heard_query(in_dev, skb, len); break; case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - case IGMPV3_HOST_MEMBERSHIP_REPORT: /* Is it our report looped back? */ - if (((struct rtable*)skb->dst)->fl.iif == 0) + if (rt_is_output_route(skb_rtable(skb))) break; /* don't rely on MC router hearing unicast reports */ if (skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST) - igmp_heard_report(in_dev, ih->group); + dropped = igmp_heard_report(in_dev, ih->group); break; case IGMP_PIM: #ifdef CONFIG_IP_PIMSM_V1 - in_dev_put(in_dev); return pim_rcv_v1(skb); #endif + case IGMPV3_HOST_MEMBERSHIP_REPORT: case IGMP_DVMRP: case IGMP_TRACE: case IGMP_HOST_LEAVE_MESSAGE: @@ -970,12 +1018,14 @@ int igmp_rcv(struct sk_buff *skb) case IGMP_MTRACE_RESP: break; default: - NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type); + break; } drop: - in_dev_put(in_dev); - kfree_skb(skb); + if (dropped) + kfree_skb(skb); + else + consume_skb(skb); return 0; } @@ -986,33 +1036,33 @@ drop: * Add a filter to a device */ -static void ip_mc_filter_add(struct in_device *in_dev, u32 addr) +static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) { char buf[MAX_ADDR_LEN]; struct net_device *dev = in_dev->dev; /* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG. We will get multicast token leakage, when IFF_MULTICAST - is changed. This check should be done in dev->set_multicast_list + is changed. This check should be done in ndo_set_rx_mode routine. Something sort of: if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; } --ANK */ if (arp_mc_map(addr, buf, dev, 0) == 0) - dev_mc_add(dev,buf,dev->addr_len,0); + dev_mc_add(dev, buf); } /* * Remove a filter from a device */ -static void ip_mc_filter_del(struct in_device *in_dev, u32 addr) +static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) { char buf[MAX_ADDR_LEN]; struct net_device *dev = in_dev->dev; if (arp_mc_map(addr, buf, dev, 0) == 0) - dev_mc_delete(dev,buf,dev->addr_len,0); + dev_mc_del(dev, buf); } #ifdef CONFIG_IP_MULTICAST @@ -1029,10 +1079,9 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) * for deleted items allows change reports to use common code with * non-deleted or query-response MCA's. */ - pmc = kmalloc(sizeof(*pmc), GFP_KERNEL); + pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); if (!pmc) return; - memset(pmc, 0, sizeof(*pmc)); spin_lock_bh(&im->lock); pmc->interface = im->interface; in_dev_hold(in_dev); @@ -1046,7 +1095,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc->tomb = im->tomb; pmc->sources = im->sources; im->tomb = im->sources = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = pmc->crcount; } spin_unlock_bh(&im->lock); @@ -1057,14 +1106,14 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) spin_unlock_bh(&in_dev->mc_tomb_lock); } -static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr) +static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) { struct ip_mc_list *pmc, *pmc_prev; struct ip_sf_list *psf, *psf_next; spin_lock_bh(&in_dev->mc_tomb_lock); pmc_prev = NULL; - for (pmc=in_dev->mc_tomb; pmc; pmc=pmc->next) { + for (pmc = in_dev->mc_tomb; pmc; pmc = pmc->next) { if (pmc->multiaddr == multiaddr) break; pmc_prev = pmc; @@ -1077,7 +1126,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr) } spin_unlock_bh(&in_dev->mc_tomb_lock); if (pmc) { - for (psf=pmc->tomb; psf; psf=psf_next) { + for (psf = pmc->tomb; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -1102,20 +1151,20 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(pmc); } /* clear dead sources, too */ - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { struct ip_sf_list *psf, *psf_next; spin_lock_bh(&pmc->lock); psf = pmc->tomb; pmc->tomb = NULL; spin_unlock_bh(&pmc->lock); - for (; psf; psf=psf_next) { + for (; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } #endif @@ -1140,20 +1189,18 @@ static void igmp_group_dropped(struct ip_mc_list *im) if (!in_dev->dead) { if (IGMP_V1_SEEN(in_dev)) - goto done; + return; if (IGMP_V2_SEEN(in_dev)) { if (reporter) igmp_send_report(in_dev, im, IGMP_HOST_LEAVE_MESSAGE); - goto done; + return; } /* IGMPv3 */ igmpv3_add_delrec(in_dev, im); igmp_ifc_event(in_dev); } -done: #endif - ip_mc_clear_src(im); } static void igmp_group_added(struct ip_mc_list *im) @@ -1190,18 +1237,69 @@ static void igmp_group_added(struct ip_mc_list *im) * Multicast list managers */ +static u32 ip_mc_hash(const struct ip_mc_list *im) +{ + return hash_32((__force u32)im->multiaddr, MC_HASH_SZ_LOG); +} + +static void ip_mc_hash_add(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash; + u32 hash; + + mc_hash = rtnl_dereference(in_dev->mc_hash); + if (mc_hash) { + hash = ip_mc_hash(im); + im->next_hash = mc_hash[hash]; + rcu_assign_pointer(mc_hash[hash], im); + return; + } + + /* do not use a hash table for small number of items */ + if (in_dev->mc_count < 4) + return; + + mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG, + GFP_KERNEL); + if (!mc_hash) + return; + + for_each_pmc_rtnl(in_dev, im) { + hash = ip_mc_hash(im); + im->next_hash = mc_hash[hash]; + RCU_INIT_POINTER(mc_hash[hash], im); + } + + rcu_assign_pointer(in_dev->mc_hash, mc_hash); +} + +static void ip_mc_hash_remove(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash); + struct ip_mc_list *aux; + + if (!mc_hash) + return; + mc_hash += ip_mc_hash(im); + while ((aux = rtnl_dereference(*mc_hash)) != im) + mc_hash = &aux->next_hash; + *mc_hash = im->next_hash; +} + /* * A socket has joined a multicast group on device dev. */ -void ip_mc_inc_group(struct in_device *in_dev, u32 addr) +void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { struct ip_mc_list *im; ASSERT_RTNL(); - for (im=in_dev->mc_list; im; im=im->next) { + for_each_pmc_rtnl(in_dev, im) { if (im->multiaddr == addr) { im->users++; ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0); @@ -1209,37 +1307,30 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr) } } - im = kmalloc(sizeof(*im), GFP_KERNEL); + im = kzalloc(sizeof(*im), GFP_KERNEL); if (!im) goto out; - im->users=1; - im->interface=in_dev; + im->users = 1; + im->interface = in_dev; in_dev_hold(in_dev); - im->multiaddr=addr; + im->multiaddr = addr; /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; - im->sfcount[MCAST_INCLUDE] = 0; im->sfcount[MCAST_EXCLUDE] = 1; - im->sources = NULL; - im->tomb = NULL; - im->crcount = 0; atomic_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - im->tm_running=0; - init_timer(&im->timer); - im->timer.data=(unsigned long)im; - im->timer.function=&igmp_timer_expire; + setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); im->unsolicit_count = IGMP_Unsolicited_Report_Count; - im->reporter = 0; - im->gsquery = 0; #endif - im->loaded = 0; - write_lock_bh(&in_dev->mc_list_lock); - im->next=in_dev->mc_list; - in_dev->mc_list=im; - write_unlock_bh(&in_dev->mc_list_lock); + + im->next_rcu = in_dev->mc_list; + in_dev->mc_count++; + rcu_assign_pointer(in_dev->mc_list, im); + + ip_mc_hash_add(in_dev, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1249,24 +1340,58 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr) out: return; } +EXPORT_SYMBOL(ip_mc_inc_group); + +/* + * Resend IGMP JOIN report; used by netdev notifier. + */ +static void ip_mc_rejoin_groups(struct in_device *in_dev) +{ +#ifdef CONFIG_IP_MULTICAST + struct ip_mc_list *im; + int type; + + ASSERT_RTNL(); + + for_each_pmc_rtnl(in_dev, im) { + if (im->multiaddr == IGMP_ALL_HOSTS) + continue; + + /* a failover is happening and switches + * must be notified immediately + */ + if (IGMP_V1_SEEN(in_dev)) + type = IGMP_HOST_MEMBERSHIP_REPORT; + else if (IGMP_V2_SEEN(in_dev)) + type = IGMPV2_HOST_MEMBERSHIP_REPORT; + else + type = IGMPV3_HOST_MEMBERSHIP_REPORT; + igmp_send_report(in_dev, im, type); + } +#endif +} /* * A socket has left a multicast group on device dev */ -void ip_mc_dec_group(struct in_device *in_dev, u32 addr) +void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { - struct ip_mc_list *i, **ip; - + struct ip_mc_list *i; + struct ip_mc_list __rcu **ip; + ASSERT_RTNL(); - - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { - if (i->multiaddr==addr) { + + for (ip = &in_dev->mc_list; + (i = rtnl_dereference(*ip)) != NULL; + ip = &i->next_rcu) { + if (i->multiaddr == addr) { if (--i->users == 0) { - write_lock_bh(&in_dev->mc_list_lock); - *ip = i->next; - write_unlock_bh(&in_dev->mc_list_lock); + ip_mc_hash_remove(in_dev, i); + *ip = i->next_rcu; + in_dev->mc_count--; igmp_group_dropped(i); + ip_mc_clear_src(i); if (!in_dev->dead) ip_rt_multicast_event(in_dev); @@ -1278,17 +1403,40 @@ void ip_mc_dec_group(struct in_device *in_dev, u32 addr) } } } +EXPORT_SYMBOL(ip_mc_dec_group); + +/* Device changing type */ + +void ip_mc_unmap(struct in_device *in_dev) +{ + struct ip_mc_list *pmc; + + ASSERT_RTNL(); + + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); +} + +void ip_mc_remap(struct in_device *in_dev) +{ + struct ip_mc_list *pmc; + + ASSERT_RTNL(); + + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); +} /* Device going down */ void ip_mc_down(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST in_dev->mr_ifc_count = 0; @@ -1307,20 +1455,14 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST - in_dev->mr_gq_running = 0; - init_timer(&in_dev->mr_gq_timer); - in_dev->mr_gq_timer.data=(unsigned long) in_dev; - in_dev->mr_gq_timer.function=&igmp_gq_timer_expire; - in_dev->mr_ifc_count = 0; - init_timer(&in_dev->mr_ifc_timer); - in_dev->mr_ifc_timer.data=(unsigned long) in_dev; - in_dev->mr_ifc_timer.function=&igmp_ifc_timer_expire; + setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, + (unsigned long)in_dev); + setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, + (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; #endif - rwlock_init(&in_dev->mc_list_lock); spin_lock_init(&in_dev->mc_tomb_lock); } @@ -1328,14 +1470,14 @@ void ip_mc_init_dev(struct in_device *in_dev) void ip_mc_up(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* @@ -1351,43 +1493,40 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); - write_lock_bh(&in_dev->mc_list_lock); - while ((i = in_dev->mc_list) != NULL) { - in_dev->mc_list = i->next; - write_unlock_bh(&in_dev->mc_list_lock); + while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { + in_dev->mc_list = i->next_rcu; + in_dev->mc_count--; - igmp_group_dropped(i); + /* We've dropped the groups in ip_mc_down already */ + ip_mc_clear_src(i); ip_ma_put(i); - - write_lock_bh(&in_dev->mc_list_lock); } - write_unlock_bh(&in_dev->mc_list_lock); } -static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) +/* RTNL is locked */ +static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = imr->imr_multiaddr.s_addr } } }; - struct rtable *rt; struct net_device *dev = NULL; struct in_device *idev = NULL; if (imr->imr_ifindex) { - idev = inetdev_by_index(imr->imr_ifindex); - if (idev) - __in_dev_put(idev); + idev = inetdev_by_index(net, imr->imr_ifindex); return idev; } if (imr->imr_address.s_addr) { - dev = ip_dev_find(imr->imr_address.s_addr); + dev = __ip_dev_find(net, imr->imr_address.s_addr, false); if (!dev) return NULL; - __dev_put(dev); } - if (!dev && !ip_route_output_key(&rt, &fl)) { - dev = rt->u.dst.dev; - ip_rt_put(rt); + if (!dev) { + struct rtable *rt = ip_route_output(net, + imr->imr_multiaddr.s_addr, + 0, 0, 0); + if (!IS_ERR(rt)) { + dev = rt->dst.dev; + ip_rt_put(rt); + } } if (dev) { imr->imr_ifindex = dev->ifindex; @@ -1399,18 +1538,18 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) /* * Join a socket to a group */ -int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS; -int sysctl_igmp_max_msf = IP_MAX_MSF; +int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; +int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, - __u32 *psfsrc) + __be32 *psfsrc) { struct ip_sf_list *psf, *psf_prev; int rv = 0; psf_prev = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == *psfsrc) break; psf_prev = psf; @@ -1436,7 +1575,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv : + psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; psf->sf_next = pmc->tomb; pmc->tomb = psf; @@ -1452,8 +1591,8 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #define igmp_ifc_event(x) do { } while (0) #endif -static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta) { struct ip_mc_list *pmc; int changerec = 0; @@ -1461,18 +1600,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); #endif @@ -1483,7 +1622,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, pmc->sfcount[sfmode]--; } err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); changerec |= rv > 0; @@ -1500,10 +1639,10 @@ static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : + pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; in_dev->mr_ifc_count = pmc->crcount; - for (psf=pmc->sources; psf; psf = psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(pmc->interface); } else if (sf_setstate(pmc) || changerec) { @@ -1519,21 +1658,20 @@ out_unlock: * Add multicast single-source filter to the interface list */ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, - __u32 *psfsrc, int delta) + __be32 *psfsrc) { struct ip_sf_list *psf, *psf_prev; psf_prev = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == *psfsrc) break; psf_prev = psf; } if (!psf) { - psf = kmalloc(sizeof(*psf), GFP_ATOMIC); + psf = kzalloc(sizeof(*psf), GFP_ATOMIC); if (!psf) return -ENOBUFS; - memset(psf, 0, sizeof(*psf)); psf->sf_inaddr = *psfsrc; if (psf_prev) { psf_prev->sf_next = psf; @@ -1553,7 +1691,7 @@ static void sf_markstate(struct ip_mc_list *pmc) struct ip_sf_list *psf; int mca_xcount = pmc->sfcount[MCAST_EXCLUDE]; - for (psf=pmc->sources; psf; psf=psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) if (pmc->sfcount[MCAST_EXCLUDE]) { psf->sf_oldin = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && @@ -1570,7 +1708,7 @@ static int sf_setstate(struct ip_mc_list *pmc) int new_in, rv; rv = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (pmc->sfcount[MCAST_EXCLUDE]) { new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; @@ -1578,9 +1716,9 @@ static int sf_setstate(struct ip_mc_list *pmc) new_in = psf->sf_count[MCAST_INCLUDE] != 0; if (new_in) { if (!psf->sf_oldin) { - struct ip_sf_list *prev = 0; + struct ip_sf_list *prev = NULL; - for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) { + for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) { if (dpsf->sf_inaddr == psf->sf_inaddr) break; prev = dpsf; @@ -1602,12 +1740,11 @@ static int sf_setstate(struct ip_mc_list *pmc) * add or update "delete" records if an active filter * is now inactive */ - for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) + for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) if (dpsf->sf_inaddr == psf->sf_inaddr) break; if (!dpsf) { - dpsf = (struct ip_sf_list *) - kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); if (!dpsf) continue; *dpsf = *psf; @@ -1626,8 +1763,8 @@ static int sf_setstate(struct ip_mc_list *pmc) /* * Add multicast source filter list to the interface list */ -static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta) { struct ip_mc_list *pmc; int isexclude; @@ -1635,18 +1772,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); @@ -1655,21 +1792,22 @@ static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, if (!delta) pmc->sfcount[sfmode]++; err = 0; - for (i=0; i<sfcount; i++) { - err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i], delta); + for (i = 0; i < sfcount; i++) { + err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]); if (err) break; } if (err) { int j; - pmc->sfcount[sfmode]--; - for (j=0; j<i; j++) - (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); + if (!delta) + pmc->sfcount[sfmode]--; + for (j = 0; j < i; j++) + (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST - struct in_device *in_dev = pmc->interface; struct ip_sf_list *psf; + in_dev = pmc->interface; #endif /* filter mode change */ @@ -1680,10 +1818,10 @@ static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : + pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; in_dev->mr_ifc_count = pmc->crcount; - for (psf=pmc->sources; psf; psf = psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(in_dev); } else if (sf_setstate(pmc)) { @@ -1698,12 +1836,12 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) { struct ip_sf_list *psf, *nextpsf; - for (psf=pmc->tomb; psf; psf=nextpsf) { + for (psf = pmc->tomb; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } pmc->tomb = NULL; - for (psf=pmc->sources; psf; psf=nextpsf) { + for (psf = pmc->sources; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } @@ -1720,19 +1858,20 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) { int err; - u32 addr = imr->imr_multiaddr.s_addr; - struct ip_mc_socklist *iml=NULL, *i; + __be32 addr = imr->imr_multiaddr.s_addr; + struct ip_mc_socklist *iml = NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); int ifindex; int count = 0; - if (!MULTICAST(addr)) + if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_shlock(); + rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { iml = NULL; @@ -1742,7 +1881,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) err = -EADDRINUSE; ifindex = imr->imr_ifindex; - for (i = inet->mc_list; i; i = i->next) { + for_each_pmc_rtnl(inet, i) { if (i->multi.imr_multiaddr.s_addr == addr && i->multi.imr_ifindex == ifindex) goto done; @@ -1751,37 +1890,40 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) err = -ENOBUFS; if (count >= sysctl_igmp_max_memberships) goto done; - iml = sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL); + iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (iml == NULL) goto done; memcpy(&iml->multi, imr, sizeof(*imr)); - iml->next = inet->mc_list; + iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; - inet->mc_list = iml; + rcu_assign_pointer(inet->mc_list, iml); ip_mc_inc_group(in_dev, addr); err = 0; done: - rtnl_shunlock(); + rtnl_unlock(); return err; } +EXPORT_SYMBOL(ip_mc_join_group); static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct in_device *in_dev) { + struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist); int err; - if (iml->sflist == 0) { + if (psf == NULL) { /* any-source empty exclude case */ return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, 0, NULL, 0); } err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, - iml->sfmode, iml->sflist->sl_count, - iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + iml->sfmode, psf->sl_count, psf->sl_addr, 0); + RCU_INIT_POINTER(iml->sflist, NULL); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psf, rcu); return err; } @@ -1792,57 +1934,73 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); - struct ip_mc_socklist *iml, **imlp; + struct ip_mc_socklist *iml; + struct ip_mc_socklist __rcu **imlp; struct in_device *in_dev; - u32 group = imr->imr_multiaddr.s_addr; + struct net *net = sock_net(sk); + __be32 group = imr->imr_multiaddr.s_addr; u32 ifindex; + int ret = -EADDRNOTAVAIL; rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { - rtnl_unlock(); - return -ENODEV; + ret = -ENODEV; + goto out; } ifindex = imr->imr_ifindex; - for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { - if (iml->multi.imr_multiaddr.s_addr == group && - iml->multi.imr_ifindex == ifindex) { - (void) ip_mc_leave_src(sk, iml, in_dev); + for (imlp = &inet->mc_list; + (iml = rtnl_dereference(*imlp)) != NULL; + imlp = &iml->next_rcu) { + if (iml->multi.imr_multiaddr.s_addr != group) + continue; + if (ifindex) { + if (iml->multi.imr_ifindex != ifindex) + continue; + } else if (imr->imr_address.s_addr && imr->imr_address.s_addr != + iml->multi.imr_address.s_addr) + continue; - *imlp = iml->next; + (void) ip_mc_leave_src(sk, iml, in_dev); - ip_mc_dec_group(in_dev, group); - rtnl_unlock(); - sock_kfree_s(sk, iml, sizeof(*iml)); - return 0; - } + *imlp = iml->next_rcu; + + ip_mc_dec_group(in_dev, group); + rtnl_unlock(); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + kfree_rcu(iml, rcu); + return 0; } +out: rtnl_unlock(); - return -EADDRNOTAVAIL; + return ret; } +EXPORT_SYMBOL(ip_mc_leave_group); int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mreq_source *mreqs, int ifindex) { int err; struct ip_mreqn imr; - u32 addr = mreqs->imr_multiaddr; + __be32 addr = mreqs->imr_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev = NULL; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); int leavegroup = 0; int i, j, rv; - if (!MULTICAST(addr)) + if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_shlock(); + rtnl_lock(); imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; imr.imr_address.s_addr = mreqs->imr_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -1850,9 +2008,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { - if (pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr - && pmc->multi.imr_ifindex == imr.imr_ifindex) + for_each_pmc_rtnl(inet, pmc) { + if ((pmc->multi.imr_multiaddr.s_addr == + imr.imr_multiaddr.s_addr) && + (pmc->multi.imr_ifindex == imr.imr_ifindex)) break; } if (!pmc) { /* must have a prior join */ @@ -1868,19 +2027,19 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } else if (pmc->sfmode != omode) { /* allow mode switches for empty-set filters */ ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 0, NULL, 0); - ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, pmc->sfmode, 0, + ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, pmc->sfmode, 0, NULL, 0); pmc->sfmode = omode; } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ rv = !0; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, - sizeof(__u32)); + sizeof(__be32)); if (rv == 0) break; } @@ -1894,10 +2053,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } /* update the interface filter */ - ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1, + ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1, &mreqs->imr_sourceaddr, 1); - for (j=i+1; j<psl->sl_count; j++) + for (j = i+1; j < psl->sl_count; j++) psl->sl_addr[j-1] = psl->sl_addr[j]; psl->sl_count--; err = 0; @@ -1923,31 +2082,34 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct newpsl->sl_max = count; newpsl->sl_count = count - IP_SFBLOCK; if (psl) { - for (i=0; i<psl->sl_count; i++) + for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); } - pmc->sflist = psl = newpsl; + rcu_assign_pointer(pmc->sflist, newpsl); + psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, - sizeof(__u32)); + sizeof(__be32)); if (rv == 0) break; } if (rv == 0) /* address already there is an error */ goto done; - for (j=psl->sl_count-1; j>=i; j--) + for (j = psl->sl_count-1; j >= i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = mreqs->imr_sourceaddr; psl->sl_count++; err = 0; /* update the interface list */ - ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1, + ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1, &mreqs->imr_sourceaddr, 1); done: - rtnl_shunlock(); + rtnl_unlock(); if (leavegroup) return ip_mc_leave_group(sk, &imr); return err; @@ -1957,25 +2119,26 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) { int err = 0; struct ip_mreqn imr; - u32 addr = msf->imsf_multiaddr; + __be32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *newpsl, *psl; + struct net *net = sock_net(sk); int leavegroup = 0; - if (!MULTICAST(addr)) + if (!ipv4_is_multicast(addr)) return -EINVAL; if (msf->imsf_fmode != MCAST_INCLUDE && msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - rtnl_shlock(); + rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -1988,7 +2151,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) goto done; } - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2018,19 +2181,21 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, msf->imsf_fmode, 0, NULL, 0); } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (psl) { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); - sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); - pmc->sflist = newpsl; + rcu_assign_pointer(pmc->sflist, newpsl); pmc->sfmode = msf->imsf_fmode; err = 0; done: - rtnl_shunlock(); + rtnl_unlock(); if (leavegroup) err = ip_mc_leave_group(sk, &imr); return err; @@ -2041,21 +2206,22 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, { int err, len, count, copycount; struct ip_mreqn imr; - u32 addr = msf->imsf_multiaddr; + __be32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); - if (!MULTICAST(addr)) + if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_shlock(); + rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = 0; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2063,7 +2229,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2071,8 +2237,8 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!pmc) /* must have a prior join */ goto done; msf->imsf_fmode = pmc->sfmode; - psl = pmc->sflist; - rtnl_shunlock(); + psl = rtnl_dereference(pmc->sflist); + rtnl_unlock(); if (!psl) { len = 0; count = 0; @@ -2091,7 +2257,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, return -EFAULT; return 0; done: - rtnl_shunlock(); + rtnl_unlock(); return err; } @@ -2100,7 +2266,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, { int err, i, count, copycount; struct sockaddr_in *psin; - u32 addr; + __be32 addr; struct ip_mc_socklist *pmc; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; @@ -2109,14 +2275,14 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (psin->sin_family != AF_INET) return -EINVAL; addr = psin->sin_addr.s_addr; - if (!MULTICAST(addr)) + if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_shlock(); + rtnl_lock(); err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == addr && pmc->multi.imr_ifindex == gsf->gf_interface) break; @@ -2124,8 +2290,8 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!pmc) /* must have a prior join */ goto done; gsf->gf_fmode = pmc->sfmode; - psl = pmc->sflist; - rtnl_shunlock(); + psl = rtnl_dereference(pmc->sflist); + rtnl_unlock(); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; @@ -2133,8 +2299,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { return -EFAULT; } - for (i=0; i<copycount; i++) { - struct sockaddr_in *psin; + for (i = 0; i < copycount; i++) { struct sockaddr_storage ss; psin = (struct sockaddr_in *)&ss; @@ -2146,43 +2311,53 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, } return 0; done: - rtnl_shunlock(); + rtnl_unlock(); return err; } /* * check if a multicast source filter allows delivery for a given <src,dst,intf> */ -int ip_mc_sf_allow(struct sock *sk, u32 loc_addr, u32 rmt_addr, int dif) +int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; struct ip_sf_socklist *psl; int i; + int ret; - if (!MULTICAST(loc_addr)) - return 1; + ret = 1; + if (!ipv4_is_multicast(loc_addr)) + goto out; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; } + ret = inet->mc_all; if (!pmc) - return 1; - psl = pmc->sflist; + goto unlock; + psl = rcu_dereference(pmc->sflist); + ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) - return pmc->sfmode == MCAST_EXCLUDE; + goto unlock; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { if (psl->sl_addr[i] == rmt_addr) break; } + ret = 0; if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) - return 0; + goto unlock; if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) - return 0; - return 1; + goto unlock; + ret = 1; +unlock: + rcu_read_unlock(); +out: + return ret; } /* @@ -2193,42 +2368,56 @@ void ip_mc_drop_socket(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; + struct net *net = sock_net(sk); if (inet->mc_list == NULL) return; rtnl_lock(); - while ((iml = inet->mc_list) != NULL) { + while ((iml = rtnl_dereference(inet->mc_list)) != NULL) { struct in_device *in_dev; - inet->mc_list = iml->next; - if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) { - (void) ip_mc_leave_src(sk, iml, in_dev); + inet->mc_list = iml->next_rcu; + in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); + (void) ip_mc_leave_src(sk, iml, in_dev); + if (in_dev != NULL) ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); - in_dev_put(in_dev); - } - sock_kfree_s(sk, iml, sizeof(*iml)); - + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + kfree_rcu(iml, rcu); } rtnl_unlock(); } -int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto) +/* called with rcu_read_lock() */ +int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; + struct ip_mc_list __rcu **mc_hash; struct ip_sf_list *psf; int rv = 0; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im; im=im->next) { - if (im->multiaddr == mc_addr) - break; + mc_hash = rcu_dereference(in_dev->mc_hash); + if (mc_hash) { + u32 hash = hash_32((__force u32)mc_addr, MC_HASH_SZ_LOG); + + for (im = rcu_dereference(mc_hash[hash]); + im != NULL; + im = rcu_dereference(im->next_hash)) { + if (im->multiaddr == mc_addr) + break; + } + } else { + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == mc_addr) + break; + } } if (im && proto == IPPROTO_IGMP) { rv = 1; } else if (im) { if (src_addr) { - for (psf=im->sources; psf; psf=psf->sf_next) { + for (psf = im->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == src_addr) break; } @@ -2241,12 +2430,12 @@ int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto) } else rv = 1; /* unspecified source; tentatively allow */ } - read_unlock(&in_dev->mc_list_lock); return rv; } #if defined(CONFIG_PROC_FS) struct igmp_mc_iter_state { + struct seq_net_private p; struct net_device *dev; struct in_device *in_dev; }; @@ -2255,24 +2444,22 @@ struct igmp_mc_iter_state { static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ip_mc_list *im = NULL; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - for (state->dev = dev_base, state->in_dev = NULL; - state->dev; - state->dev = state->dev->next) { + state->in_dev = NULL; + for_each_netdev_rcu(net, state->dev) { struct in_device *in_dev; - in_dev = in_dev_get(state->dev); + + in_dev = __in_dev_get_rcu(state->dev); if (!in_dev) continue; - read_lock(&in_dev->mc_list_lock); - im = in_dev->mc_list; + im = rcu_dereference(in_dev->mc_list); if (im) { state->in_dev = in_dev; break; } - read_unlock(&in_dev->mc_list_lock); - in_dev_put(in_dev); } return im; } @@ -2280,22 +2467,18 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - im = im->next; + + im = rcu_dereference(im->next_rcu); while (!im) { - if (likely(state->in_dev != NULL)) { - read_unlock(&state->in_dev->mc_list_lock); - in_dev_put(state->in_dev); - } - state->dev = state->dev->next; + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->in_dev = NULL; break; } - state->in_dev = in_dev_get(state->dev); + state->in_dev = __in_dev_get_rcu(state->dev); if (!state->in_dev) continue; - read_lock(&state->in_dev->mc_list_lock); - im = state->in_dev->mc_list; + im = rcu_dereference(state->in_dev->mc_list); } return im; } @@ -2310,8 +2493,9 @@ static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(rcu) { - read_lock(&dev_base_lock); + rcu_read_lock(); return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2327,26 +2511,26 @@ static void *igmp_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp_mc_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - if (likely(state->in_dev != NULL)) { - read_unlock(&state->in_dev->mc_list_lock); - in_dev_put(state->in_dev); - state->in_dev = NULL; - } + + state->in_dev = NULL; state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp_mc_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) - seq_puts(seq, + seq_puts(seq, "Idx\tDevice : Count Querier\tGroup Users Timer\tReporter\n"); else { struct ip_mc_list *im = (struct ip_mc_list *)v; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); char *querier; + long delta; + #ifdef CONFIG_IP_MULTICAST querier = IGMP_V1_SEEN(state->in_dev) ? "V1" : IGMP_V2_SEEN(state->in_dev) ? "V2" : @@ -2355,22 +2539,23 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) querier = "NONE"; #endif - if (state->in_dev->mc_list == im) { + if (rcu_dereference(state->in_dev->mc_list) == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", - state->dev->ifindex, state->dev->name, state->dev->mc_count, querier); + state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } + delta = im->timer.expires - jiffies; seq_printf(seq, - "\t\t\t\t%08lX %5d %d:%08lX\t\t%d\n", + "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n", im->multiaddr, im->users, - im->tm_running, im->tm_running ? - jiffies_to_clock_t(im->timer.expires-jiffies) : 0, + im->tm_running, + im->tm_running ? jiffies_delta_to_clock_t(delta) : 0, im->reporter); } return 0; } -static struct seq_operations igmp_mc_seq_ops = { +static const struct seq_operations igmp_mc_seq_ops = { .start = igmp_mc_seq_start, .next = igmp_mc_seq_next, .stop = igmp_mc_seq_stop, @@ -2379,35 +2564,20 @@ static struct seq_operations igmp_mc_seq_ops = { static int igmp_mc_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - rc = seq_open(file, &igmp_mc_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; - memset(s, 0, sizeof(*s)); -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_net(inode, file, &igmp_mc_seq_ops, + sizeof(struct igmp_mc_iter_state)); } -static struct file_operations igmp_mc_seq_fops = { +static const struct file_operations igmp_mc_seq_fops = { .owner = THIS_MODULE, .open = igmp_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; struct igmp_mcf_iter_state { + struct seq_net_private p; struct net_device *dev; struct in_device *idev; struct ip_mc_list *im; @@ -2417,19 +2587,19 @@ struct igmp_mcf_iter_state { static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ip_sf_list *psf = NULL; struct ip_mc_list *im = NULL; struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); - for (state->dev = dev_base, state->idev = NULL, state->im = NULL; - state->dev; - state->dev = state->dev->next) { + state->idev = NULL; + state->im = NULL; + for_each_netdev_rcu(net, state->dev) { struct in_device *idev; - idev = in_dev_get(state->dev); + idev = __in_dev_get_rcu(state->dev); if (unlikely(idev == NULL)) continue; - read_lock(&idev->mc_list_lock); - im = idev->mc_list; + im = rcu_dereference(idev->mc_list); if (likely(im != NULL)) { spin_lock_bh(&im->lock); psf = im->sources; @@ -2440,8 +2610,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) } spin_unlock_bh(&im->lock); } - read_unlock(&idev->mc_list_lock); - in_dev_put(idev); } return psf; } @@ -2455,20 +2623,15 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l spin_unlock_bh(&state->im->lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) { - read_unlock(&state->idev->mc_list_lock); - in_dev_put(state->idev); - } - state->dev = state->dev->next; + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; goto out; } - state->idev = in_dev_get(state->dev); + state->idev = __in_dev_get_rcu(state->dev); if (!state->idev) continue; - read_lock(&state->idev->mc_list_lock); - state->im = state->idev->mc_list; + state->im = rcu_dereference(state->idev->mc_list); } if (!state->im) break; @@ -2489,8 +2652,9 @@ static struct ip_sf_list *igmp_mcf_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp_mcf_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(rcu) { - read_lock(&dev_base_lock); + rcu_read_lock(); return *pos ? igmp_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2506,19 +2670,16 @@ static void *igmp_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) { struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); if (likely(state->im != NULL)) { spin_unlock_bh(&state->im->lock); state->im = NULL; } - if (likely(state->idev != NULL)) { - read_unlock(&state->idev->mc_list_lock); - in_dev_put(state->idev); - state->idev = NULL; - } + state->idev = NULL; state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp_mcf_seq_show(struct seq_file *seq, void *v) @@ -2527,7 +2688,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v) struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); if (v == SEQ_START_TOKEN) { - seq_printf(seq, + seq_printf(seq, "%3s %6s " "%10s %10s %6s %6s\n", "Idx", "Device", "MCA", @@ -2535,8 +2696,8 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v) } else { seq_printf(seq, "%3d %6.6s 0x%08x " - "0x%08x %6lu %6lu\n", - state->dev->ifindex, state->dev->name, + "0x%08x %6lu %6lu\n", + state->dev->ifindex, state->dev->name, ntohl(state->im->multiaddr), ntohl(psf->sf_inaddr), psf->sf_count[MCAST_INCLUDE], @@ -2545,7 +2706,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations igmp_mcf_seq_ops = { +static const struct seq_operations igmp_mcf_seq_ops = { .start = igmp_mcf_seq_start, .next = igmp_mcf_seq_next, .stop = igmp_mcf_seq_stop, @@ -2554,42 +2715,88 @@ static struct seq_operations igmp_mcf_seq_ops = { static int igmp_mcf_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - struct igmp_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - rc = seq_open(file, &igmp_mcf_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; - memset(s, 0, sizeof(*s)); -out: - return rc; -out_kfree: - kfree(s); - goto out; + return seq_open_net(inode, file, &igmp_mcf_seq_ops, + sizeof(struct igmp_mcf_iter_state)); } -static struct file_operations igmp_mcf_seq_fops = { +static const struct file_operations igmp_mcf_seq_fops = { .owner = THIS_MODULE, .open = igmp_mcf_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; -int __init igmp_mc_proc_init(void) +static int __net_init igmp_net_init(struct net *net) { - proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops); - proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + struct proc_dir_entry *pde; + + pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops); + if (!pde) + goto out_igmp; + pde = proc_create("mcfilter", S_IRUGO, net->proc_net, + &igmp_mcf_seq_fops); + if (!pde) + goto out_mcfilter; return 0; + +out_mcfilter: + remove_proc_entry("igmp", net->proc_net); +out_igmp: + return -ENOMEM; +} + +static void __net_exit igmp_net_exit(struct net *net) +{ + remove_proc_entry("mcfilter", net->proc_net); + remove_proc_entry("igmp", net->proc_net); } + +static struct pernet_operations igmp_net_ops = { + .init = igmp_net_init, + .exit = igmp_net_exit, +}; #endif -EXPORT_SYMBOL(ip_mc_dec_group); -EXPORT_SYMBOL(ip_mc_inc_group); -EXPORT_SYMBOL(ip_mc_join_group); +static int igmp_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct in_device *in_dev; + + switch (event) { + case NETDEV_RESEND_IGMP: + in_dev = __in_dev_get_rtnl(dev); + if (in_dev) + ip_mc_rejoin_groups(in_dev); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block igmp_notifier = { + .notifier_call = igmp_netdev_event, +}; + +int __init igmp_mc_init(void) +{ +#if defined(CONFIG_PROC_FS) + int err; + + err = register_pernet_subsys(&igmp_net_ops); + if (err) + return err; + err = register_netdevice_notifier(&igmp_notifier); + if (err) + goto reg_notif_fail; + return 0; + +reg_notif_fail: + unregister_pernet_subsys(&igmp_net_ops); + return err; +#else + return register_netdevice_notifier(&igmp_notifier); +#endif +} |
