diff options
author | David Woodhouse <dwmw2@infradead.org> | 2007-07-23 10:20:10 +0100 |
---|---|---|
committer | David Woodhouse <dwmw2@infradead.org> | 2007-07-23 10:20:10 +0100 |
commit | 39fe5434cb9de5da40510028b17b96bc4eb312b3 (patch) | |
tree | 7a02a317b9ad57da51ca99887c119e779ccf3f13 /net/core | |
parent | 0fc72b81d3111d114ab378935b1cf07680ca1289 (diff) | |
parent | f695baf2df9e0413d3521661070103711545207a (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 431 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 200 | ||||
-rw-r--r-- | net/core/ethtool.c | 12 | ||||
-rw-r--r-- | net/core/flow.c | 2 | ||||
-rw-r--r-- | net/core/gen_estimator.c | 82 | ||||
-rw-r--r-- | net/core/neighbour.c | 2 | ||||
-rw-r--r-- | net/core/netpoll.c | 10 | ||||
-rw-r--r-- | net/core/pktgen.c | 251 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 470 | ||||
-rw-r--r-- | net/core/scm.c | 3 | ||||
-rw-r--r-- | net/core/skbuff.c | 17 | ||||
-rw-r--r-- | net/core/sock.c | 74 |
12 files changed, 1208 insertions, 346 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index ee051bb398a..ee4035571c2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,6 +98,7 @@ #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/if_bridge.h> +#include <linux/if_macvlan.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -151,9 +152,22 @@ static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ static struct list_head ptype_all __read_mostly; /* Taps */ #ifdef CONFIG_NET_DMA -static struct dma_client *net_dma_client; -static unsigned int net_dma_count; -static spinlock_t net_dma_event_lock; +struct net_dma { + struct dma_client client; + spinlock_t lock; + cpumask_t channel_mask; + struct dma_chan *channels[NR_CPUS]; +}; + +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state); + +static struct net_dma net_dma = { + .client = { + .event_callback = netdev_dma_event, + }, +}; #endif /* @@ -942,7 +956,7 @@ int dev_open(struct net_device *dev) /* * Initialize multicasting status */ - dev_mc_upload(dev); + dev_set_rx_mode(dev); /* * Wakeup transmit queue engine @@ -1429,7 +1443,9 @@ gso: skb->next = nskb; return rc; } - if (unlikely(netif_queue_stopped(dev) && skb->next)) + if (unlikely((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) && + skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1510,8 +1526,10 @@ int dev_queue_xmit(struct sk_buff *skb) skb_headroom(skb)); if (!(dev->features & NETIF_F_GEN_CSUM) && - (!(dev->features & NETIF_F_IP_CSUM) || - skb->protocol != htons(ETH_P_IP))) + !((dev->features & NETIF_F_IP_CSUM) && + skb->protocol == htons(ETH_P_IP)) && + !((dev->features & NETIF_F_IPV6_CSUM) && + skb->protocol == htons(ETH_P_IPV6))) if (skb_checksum_help(skb)) goto out_kfree_skb; } @@ -1545,6 +1563,8 @@ gso: spin_lock(&dev->queue_lock); q = dev->qdisc; if (q->enqueue) { + /* reset queue_mapping to zero */ + skb->queue_mapping = 0; rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); @@ -1574,7 +1594,8 @@ gso: HARD_TX_LOCK(dev, cpu); - if (!netif_queue_stopped(dev)) { + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); @@ -1793,6 +1814,28 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb, #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) #endif +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; +EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); + +static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, + struct net_device *orig_dev) +{ + if (skb->dev->macvlan_port == NULL) + return skb; + + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + return macvlan_handle_frame_hook(skb); +} +#else +#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) +#endif + #ifdef CONFIG_NET_CLS_ACT /* TODO: Maybe we should just force sch_ingress to be compiled in * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions @@ -1900,6 +1943,9 @@ ncls: skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; + skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto out; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { @@ -2015,12 +2061,13 @@ out: * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ - if (net_dma_client) { - struct dma_chan *chan; - rcu_read_lock(); - list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) - dma_async_memcpy_issue_pending(chan); - rcu_read_unlock(); + if (!cpus_empty(net_dma.channel_mask)) { + int chan_idx; + for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + struct dma_chan *chan = net_dma.channels[chan_idx]; + if (chan) + dma_async_memcpy_issue_pending(chan); + } } #endif return; @@ -2496,26 +2543,17 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - */ -void dev_set_promiscuity(struct net_device *dev, int inc) +static void __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + if ((dev->promiscuity += inc) == 0) dev->flags &= ~IFF_PROMISC; else dev->flags |= IFF_PROMISC; if (dev->flags != old_flags) { - dev_mc_upload(dev); printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); @@ -2525,10 +2563,32 @@ void dev_set_promiscuity(struct net_device *dev, int inc) dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current->audit_context)); + + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_PROMISC); } } /** + * dev_set_promiscuity - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + */ +void dev_set_promiscuity(struct net_device *dev, int inc) +{ + unsigned short old_flags = dev->flags; + + __dev_set_promiscuity(dev, inc); + if (dev->flags != old_flags) + dev_set_rx_mode(dev); +} + +/** * dev_set_allmulti - update allmulti count on a device * @dev: device * @inc: modifier @@ -2544,11 +2604,190 @@ void dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + dev->flags |= IFF_ALLMULTI; if ((dev->allmulti += inc) == 0) dev->flags &= ~IFF_ALLMULTI; - if (dev->flags ^ old_flags) - dev_mc_upload(dev); + if (dev->flags ^ old_flags) { + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_set_rx_mode(dev); + } +} + +/* + * Upload unicast and multicast address lists to device and + * configure RX filtering. When the device doesn't support unicast + * filtering it is put in promiscous mode while unicast addresses + * are present. + */ +void __dev_set_rx_mode(struct net_device *dev) +{ + /* dev_open will call this function so the list will stay sane. */ + if (!(dev->flags&IFF_UP)) + return; + + if (!netif_device_present(dev)) + return; + + if (dev->set_rx_mode) + dev->set_rx_mode(dev); + else { + /* Unicast addresses changes may only happen under the rtnl, + * therefore calling __dev_set_promiscuity here is safe. + */ + if (dev->uc_count > 0 && !dev->uc_promisc) { + __dev_set_promiscuity(dev, 1); + dev->uc_promisc = 1; + } else if (dev->uc_count == 0 && dev->uc_promisc) { + __dev_set_promiscuity(dev, -1); + dev->uc_promisc = 0; + } + + if (dev->set_multicast_list) + dev->set_multicast_list(dev); + } +} + +void dev_set_rx_mode(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); +} + +int __dev_addr_delete(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (; (da = *list) != NULL; list = &da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + alen == da->da_addrlen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 0; + if (old_glbl == 0) + break; + } + if (--da->da_users) + return 0; + + *list = da->next; + kfree(da); + (*count)--; + return 0; + } + } + return -ENOENT; +} + +int __dev_addr_add(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (da = *list; da != NULL; da = da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + da->da_addrlen == alen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 1; + if (old_glbl) + return 0; + } + da->da_users++; + return 0; + } + } + + da = kmalloc(sizeof(*da), GFP_ATOMIC); + if (da == NULL) + return -ENOMEM; + memcpy(da->da_addr, addr, alen); + da->da_addrlen = alen; + da->da_users = 1; + da->da_gusers = glbl ? 1 : 0; + da->next = *list; + *list = da; + (*count)++; + return 0; +} + +/** + * dev_unicast_delete - Release secondary unicast address. + * @dev: device + * + * Release reference to a secondary unicast address and remove it + * from the device if the reference count drop to zero. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_delete(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_delete); + +/** + * dev_unicast_add - add a secondary unicast address + * @dev: device + * + * Add a secondary unicast address to the device or increase + * the reference count if it already exists. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_add(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_add); + +static void __dev_addr_discard(struct dev_addr_list **list) +{ + struct dev_addr_list *tmp; + + while (*list != NULL) { + tmp = *list; + *list = tmp->next; + if (tmp->da_users > tmp->da_gusers) + printk("__dev_addr_discard: address leakage! " + "da_users=%d\n", tmp->da_users); + kfree(tmp); + } +} + +static void dev_addr_discard(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + + __dev_addr_discard(&dev->uc_list); + dev->uc_count = 0; + + __dev_addr_discard(&dev->mc_list); + dev->mc_count = 0; + + netif_tx_unlock_bh(dev); } unsigned dev_get_flags(const struct net_device *dev) @@ -2580,6 +2819,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) int ret, changes; int old_flags = dev->flags; + ASSERT_RTNL(); + /* * Set the flags on our device. */ @@ -2594,7 +2835,10 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - dev_mc_upload(dev); + if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) + dev->change_rx_flags(dev, IFF_MULTICAST); + + dev_set_rx_mode(dev); /* * Have we downed the interface. We handle IFF_UP ourselves @@ -2607,7 +2851,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); if (!ret) - dev_mc_upload(dev); + dev_set_rx_mode(dev); } if (dev->flags & IFF_UP && @@ -3107,6 +3351,22 @@ int register_netdevice(struct net_device *dev) } } + /* Fix illegal checksum combinations */ + if ((dev->features & NETIF_F_HW_CSUM) && + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((dev->features & NETIF_F_NO_CSUM) && + (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + + /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && !(dev->features & NETIF_F_ALL_CSUM)) { @@ -3343,16 +3603,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev) } /** - * alloc_netdev - allocate network device + * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device + * @queue_count: the number of subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. + * and performs basic initialization. Also allocates subquue structs + * for each queue on the device at the end of the netdevice. */ -struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)) +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), unsigned int queue_count) { void *p; struct net_device *dev; @@ -3361,7 +3623,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); /* ensure 32-byte alignment of both the device and private area */ - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST + + (sizeof(struct net_device_subqueue) * (queue_count - 1))) & + ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); @@ -3374,15 +3638,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; - if (sizeof_priv) - dev->priv = netdev_priv(dev); + if (sizeof_priv) { + dev->priv = ((char *)dev + + ((sizeof(struct net_device) + + (sizeof(struct net_device_subqueue) * + (queue_count - 1)) + NETDEV_ALIGN_CONST) + & ~NETDEV_ALIGN_CONST)); + } + + dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; setup(dev); strcpy(dev->name, name); return dev; } -EXPORT_SYMBOL(alloc_netdev); +EXPORT_SYMBOL(alloc_netdev_mq); /** * free_netdev - free network device @@ -3471,9 +3742,9 @@ void unregister_netdevice(struct net_device *dev) raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); /* - * Flush the multicast chain + * Flush the unicast and multicast chains */ - dev_mc_discard(dev); + dev_addr_discard(dev); if (dev->uninit) dev->uninit(dev); @@ -3563,12 +3834,13 @@ static int dev_cpu_callback(struct notifier_block *nfb, * This is called when the number of channels allocated to the net_dma_client * changes. The net_dma_client tries to have one DMA channel per CPU. */ -static void net_dma_rebalance(void) + +static void net_dma_rebalance(struct net_dma *net_dma) { - unsigned int cpu, i, n; + unsigned int cpu, i, n, chan_idx; struct dma_chan *chan; - if (net_dma_count == 0) { + if (cpus_empty(net_dma->channel_mask)) { for_each_online_cpu(cpu) rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; @@ -3577,10 +3849,12 @@ static void net_dma_rebalance(void) i = 0; cpu = first_cpu(cpu_online_map); - rcu_read_lock(); - list_for_each_entry(chan, &net_dma_client->channels, client_node) { - n = ((num_online_cpus() / net_dma_count) - + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + chan = net_dma->channels[chan_idx]; + + n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) + + (i < (num_online_cpus() % + cpus_weight(net_dma->channel_mask)) ? 1 : 0)); while(n) { per_cpu(softnet_data, cpu).net_dma = chan; @@ -3589,7 +3863,6 @@ static void net_dma_rebalance(void) } i++; } - rcu_read_unlock(); } /** @@ -3598,23 +3871,53 @@ static void net_dma_rebalance(void) * @chan: DMA channel for the event * @event: event type */ -static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_event event) -{ - spin_lock(&net_dma_event_lock); - switch (event) { - case DMA_RESOURCE_ADDED: - net_dma_count++; - net_dma_rebalance(); +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state) +{ + int i, found = 0, pos = -1; + struct net_dma *net_dma = + container_of(client, struct net_dma, client); + enum dma_state_client ack = DMA_DUP; /* default: take no action */ + + spin_lock(&net_dma->lock); + switch (state) { + case DMA_RESOURCE_AVAILABLE: + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + break; + } else if (net_dma->channels[i] == NULL && pos < 0) + pos = i; + + if (!found && pos >= 0) { + ack = DMA_ACK; + net_dma->channels[pos] = chan; + cpu_set(pos, net_dma->channel_mask); + net_dma_rebalance(net_dma); + } break; case DMA_RESOURCE_REMOVED: - net_dma_count--; - net_dma_rebalance(); + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + pos = i; + break; + } + + if (found) { + ack = DMA_ACK; + cpu_clear(pos, net_dma->channel_mask); + net_dma->channels[i] = NULL; + net_dma_rebalance(net_dma); + } break; default: break; } - spin_unlock(&net_dma_event_lock); + spin_unlock(&net_dma->lock); + + return ack; } /** @@ -3622,12 +3925,10 @@ static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, */ static int __init netdev_dma_register(void) { - spin_lock_init(&net_dma_event_lock); - net_dma_client = dma_async_client_register(netdev_dma_event); - if (net_dma_client == NULL) - return -ENOMEM; - - dma_async_client_chan_request(net_dma_client, num_online_cpus()); + spin_lock_init(&net_dma.lock); + dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); + dma_async_client_register(&net_dma.client); + dma_async_client_chan_request(&net_dma.client); return 0; } diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 5a54053386c..99aece1aecc 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -64,85 +64,24 @@ */ /* - * Update the multicast list into the physical NIC controller. - */ - -static void __dev_mc_upload(struct net_device *dev) -{ - /* Don't do anything till we up the interface - * [dev_open will call this function so the list will - * stay sane] - */ - - if (!(dev->flags&IFF_UP)) - return; - - /* - * Devices with no set multicast or which have been - * detached don't get set. - */ - - if (dev->set_multicast_list == NULL || - !netif_device_present(dev)) - return; - - dev->set_multicast_list(dev); -} - -void dev_mc_upload(struct net_device *dev) -{ - netif_tx_lock_bh(dev); - __dev_mc_upload(dev); - netif_tx_unlock_bh(dev); -} - -/* * Delete a device level multicast */ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, **dmip; + int err; netif_tx_lock_bh(dev); - - for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { + err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, + addr, alen, glbl); + if (!err) { /* - * Find the entry we want to delete. The device could - * have variable length entries so check these too. + * We have altered the list, so the card + * loaded filter is now wrong. Fix it */ - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - alen == dmi->dmi_addrlen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 0; - if (old_glbl == 0) - break; - } - if (--dmi->dmi_users) - goto done; - - /* - * Last user. So delete the entry. - */ - *dmip = dmi->next; - dev->mc_count--; - - kfree(dmi); - - /* - * We have altered the list, so the card - * loaded filter is now wrong. Fix it - */ - __dev_mc_upload(dev); - - netif_tx_unlock_bh(dev); - return 0; - } + + __dev_set_rx_mode(dev); } - err = -ENOENT; -done: netif_tx_unlock_bh(dev); return err; } @@ -153,68 +92,90 @@ done: int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, *dmi1; - - dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC); + int err; netif_tx_lock_bh(dev); - for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - dmi->dmi_addrlen == alen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 1; - if (old_glbl) - goto done; - } - dmi->dmi_users++; - goto done; - } - } - - if ((dmi = dmi1) == NULL) { - netif_tx_unlock_bh(dev); - return -ENOMEM; - } - memcpy(dmi->dmi_addr, addr, alen); - dmi->dmi_addrlen = alen; - dmi->next = dev->mc_list; - dmi->dmi_users = 1; - dmi->dmi_gusers = glbl ? 1 : 0; - dev->mc_list = dmi; - dev->mc_count++; + err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} - __dev_mc_upload(dev); +/** + * dev_mc_sync - Synchronize device's multicast list to another device + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. + * + * This function is intended to be called from the dev->set_multicast_list + * function of layered software devices. + */ +int dev_mc_sync(struct net_device *to, struct net_device *from) +{ + struct dev_addr_list *da; + int err = 0; - netif_tx_unlock_bh(dev); - return 0; + netif_tx_lock_bh(to); + for (da = from->mc_list; da != NULL; da = da->next) { + if (!da->da_synced) { + err = __dev_addr_add(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + if (err < 0) + break; + da->da_synced = 1; + da->da_users++; + } else if (da->da_users == 1) { + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + } + } + if (!err) + __dev_set_rx_mode(to); + netif_tx_unlock_bh(to); -done: - netif_tx_unlock_bh(dev); - kfree(dmi1); return err; } +EXPORT_SYMBOL(dev_mc_sync); -/* - * Discard multicast list when a device is downed - */ -void dev_mc_discard(struct net_device *dev) +/** + * dev_mc_unsync - Remove synchronized addresses from the destination + * device + * @to: destination device + * @from: source device + * + * Remove all addresses that were added to the destination device by + * dev_mc_sync(). This function is intended to be called from the + * dev->stop function of layered software devices. + */ +void dev_mc_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(dev); - - while (dev->mc_list != NULL) { - struct dev_mc_list *tmp = dev->mc_list; - dev->mc_list = tmp->next; - if (tmp->dmi_users > tmp->dmi_gusers) - printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users); - kfree(tmp); + struct dev_addr_list *da; + + netif_tx_lock_bh(from); + netif_tx_lock_bh(to); + + for (da = from->mc_list; da != NULL; da = da->next) { + if (!da->da_synced) + continue; + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + da->da_synced = 0; + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); } - dev->mc_count = 0; + __dev_set_rx_mode(to); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(to); + netif_tx_unlock_bh(from); } +EXPORT_SYMBOL(dev_mc_unsync); #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) @@ -244,7 +205,7 @@ static void dev_mc_seq_stop(struct seq_file *seq, void *v) static int dev_mc_seq_show(struct seq_file *seq, void *v) { - struct dev_mc_list *m; + struct dev_addr_list *m; struct net_device *dev = v; netif_tx_lock_bh(dev); @@ -292,4 +253,3 @@ void __init dev_mcast_init(void) EXPORT_SYMBOL(dev_mc_add); EXPORT_SYMBOL(dev_mc_delete); -EXPORT_SYMBOL(dev_mc_upload); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 8d5e5a09b57..0b531e98ec3 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -52,6 +52,17 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) return 0; } + +int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + else + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + + return 0; +} + u32 ethtool_op_get_sg(struct net_device *dev) { return (dev->features & NETIF_F_SG) != 0; @@ -980,5 +991,6 @@ EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); +EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); diff --git a/net/core/flow.c b/net/core/flow.c index 051430545a0..0ab5234b17d 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -350,7 +350,7 @@ static int __init flow_cache_init(void) flow_cachep = kmem_cache_create("flow_cache", sizeof(struct flow_cache_entry), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); flow_hash_shift = 10; flow_lwm = 2 * flow_hash_size; flow_hwm = 4 * flow_hash_size; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 17daf4c9f79..590a767b029 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -79,27 +79,27 @@ struct gen_estimator { - struct gen_estimator *next; + struct list_head list; struct gnet_stats_basic *bstats; struct gnet_stats_rate_est *rate_est; spinlock_t *stats_lock; - unsigned interval; int ewma_log; u64 last_bytes; u32 last_packets; u32 avpps; u32 avbps; + struct rcu_head e_rcu; }; struct gen_estimator_head { struct timer_list timer; - struct gen_estimator *list; + struct list_head list; }; static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; -/* Estimator array lock */ +/* Protects against NULL dereference */ static DEFINE_RWLOCK(est_lock); static void est_timer(unsigned long arg) @@ -107,13 +107,17 @@ static void est_timer(unsigned long arg) int idx = (int)arg; struct gen_estimator *e; - read_lock(&est_lock); - for (e = elist[idx].list; e; e = e->next) { + rcu_read_lock(); + list_for_each_entry_rcu(e, &elist[idx].list, list) { u64 nbytes; u32 npackets; u32 rate; spin_lock(e->stats_lock); + read_lock(&est_lock); + if (e->bstats == NULL) + goto skip; + nbytes = e->bstats->bytes; npackets = e->bstats->packets; rate = (nbytes - e->last_bytes)<<(7 - idx); @@ -125,11 +129,14 @@ static void est_timer(unsigned long arg) e->last_packets = npackets; e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; e->rate_est->pps = (e->avpps+0x1FF)>>10; +skip: + read_unlock(&est_lock); spin_unlock(e->stats_lock); } - mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); - read_unlock(&est_lock); + if (!list_empty(&elist[idx].list)) + mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); + rcu_read_unlock(); } /** @@ -146,12 +153,17 @@ static void est_timer(unsigned long arg) * &rate_est with the statistics lock grabed during this period. * * Returns 0 on success or a negative error code. + * + * NOTE: Called under rtnl_mutex */ int gen_new_estimator(struct gnet_stats_basic *bstats, - struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt) + struct gnet_stats_rate_est *rate_est, + spinlock_t *stats_lock, + struct rtattr *opt) { struct gen_estimator *est; struct gnet_estimator *parm = RTA_DATA(opt); + int idx; if (RTA_PAYLOAD(opt) < sizeof(*parm)) return -EINVAL; @@ -163,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, if (est == NULL) return -ENOBUFS; - est->interval = parm->interval + 2; + idx = parm->interval + 2; est->bstats = bstats; est->rate_est = rate_est; est->stats_lock = stats_lock; @@ -173,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; - est->next = elist[est->interval].list; - if (est->next == NULL) { - init_timer(&elist[est->interval].timer); - elist[est->interval].timer.data = est->interval; - elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); - elist[est->interval].timer.function = est_timer;< |