aboutsummaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c871
1 files changed, 588 insertions, 283 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index a76021c7120..1e169a541ce 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -92,6 +92,7 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
+#include <net/net_namespace.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
@@ -189,25 +190,50 @@ static struct net_dma net_dma = {
* unregister_netdevice(), which must be called with the rtnl
* semaphore held.
*/
-LIST_HEAD(dev_base_head);
DEFINE_RWLOCK(dev_base_lock);
-EXPORT_SYMBOL(dev_base_head);
EXPORT_SYMBOL(dev_base_lock);
#define NETDEV_HASHBITS 8
-static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
-static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
-static inline struct hlist_head *dev_name_hash(const char *name)
+static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
- return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+ return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
}
-static inline struct hlist_head *dev_index_hash(int ifindex)
+static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
{
- return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+ return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
+}
+
+/* Device list insertion */
+static int list_netdevice(struct net_device *dev)
+{
+ struct net *net = dev->nd_net;
+
+ ASSERT_RTNL();
+
+ write_lock_bh(&dev_base_lock);
+ list_add_tail(&dev->dev_list, &net->dev_base_head);
+ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+ hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
+ write_unlock_bh(&dev_base_lock);
+ return 0;
+}
+
+/* Device list removal */
+static void unlist_netdevice(struct net_device *dev)
+{
+ ASSERT_RTNL();
+
+ /* Unlink dev from the device chain */
+ write_lock_bh(&dev_base_lock);
+ list_del(&dev->dev_list);
+ hlist_del(&dev->name_hlist);
+ hlist_del(&dev->index_hlist);
+ write_unlock_bh(&dev_base_lock);
}
/*
@@ -220,17 +246,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
* Device drivers call our routines to queue packets here. We empty the
* queue in the local softnet handler.
*/
-DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
-#ifdef CONFIG_SYSFS
-extern int netdev_sysfs_init(void);
-extern int netdev_register_sysfs(struct net_device *);
-extern void netdev_unregister_sysfs(struct net_device *);
-#else
-#define netdev_sysfs_init() (0)
-#define netdev_register_sysfs(dev) (0)
-#define netdev_unregister_sysfs(dev) do { } while(0)
-#endif
+DEFINE_PER_CPU(struct softnet_data, softnet_data);
+
+extern int netdev_kobject_init(void);
+extern int netdev_register_kobject(struct net_device *);
+extern void netdev_unregister_kobject(struct net_device *);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
@@ -490,7 +511,7 @@ unsigned long netdev_boot_base(const char *prefix, int unit)
* If device already registered then return base of 1
* to indicate not to probe for this interface
*/
- if (__dev_get_by_name(name))
+ if (__dev_get_by_name(&init_net, name))
return 1;
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
@@ -545,11 +566,11 @@ __setup("netdev=", netdev_boot_setup);
* careful with locks.
*/
-struct net_device *__dev_get_by_name(const char *name)
+struct net_device *__dev_get_by_name(struct net *net, const char *name)
{
struct hlist_node *p;
- hlist_for_each(p, dev_name_hash(name)) {
+ hlist_for_each(p, dev_name_hash(net, name)) {
struct net_device *dev
= hlist_entry(p, struct net_device, name_hlist);
if (!strncmp(dev->name, name, IFNAMSIZ))
@@ -569,12 +590,12 @@ struct net_device *__dev_get_by_name(const char *name)
* matching device is found.
*/
-struct net_device *dev_get_by_name(const char *name)
+struct net_device *dev_get_by_name(struct net *net, const char *name)
{
struct net_device *dev;
read_lock(&dev_base_lock);
- dev = __dev_get_by_name(name);
+ dev = __dev_get_by_name(net, name);
if (dev)
dev_hold(dev);
read_unlock(&dev_base_lock);
@@ -592,11 +613,11 @@ struct net_device *dev_get_by_name(const char *name)
* or @dev_base_lock.
*/
-struct net_device *__dev_get_by_index(int ifindex)
+struct net_device *__dev_get_by_index(struct net *net, int ifindex)
{
struct hlist_node *p;
- hlist_for_each(p, dev_index_hash(ifindex)) {
+ hlist_for_each(p, dev_index_hash(net, ifindex)) {
struct net_device *dev
= hlist_entry(p, struct net_device, index_hlist);
if (dev->ifindex == ifindex)
@@ -616,12 +637,12 @@ struct net_device *__dev_get_by_index(int ifindex)
* dev_put to indicate they have finished with it.
*/
-struct net_device *dev_get_by_index(int ifindex)
+struct net_device *dev_get_by_index(struct net *net, int ifindex)
{
struct net_device *dev;
read_lock(&dev_base_lock);
- dev = __dev_get_by_index(ifindex);
+ dev = __dev_get_by_index(net, ifindex);
if (dev)
dev_hold(dev);
read_unlock(&dev_base_lock);
@@ -642,13 +663,13 @@ struct net_device *dev_get_by_index(int ifindex)
* If the API was consistent this would be __dev_get_by_hwaddr
*/
-struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
{
struct net_device *dev;
ASSERT_RTNL();
- for_each_netdev(dev)
+ for_each_netdev(&init_net, dev)
if (dev->type == type &&
!memcmp(dev->dev_addr, ha, dev->addr_len))
return dev;
@@ -658,12 +679,12 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
EXPORT_SYMBOL(dev_getbyhwaddr);
-struct net_device *__dev_getfirstbyhwtype(unsigned short type)
+struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
struct net_device *dev;
ASSERT_RTNL();
- for_each_netdev(dev)
+ for_each_netdev(net, dev)
if (dev->type == type)
return dev;
@@ -672,12 +693,12 @@ struct net_device *__dev_getfirstbyhwtype(unsigned short type)
EXPORT_SYMBOL(__dev_getfirstbyhwtype);
-struct net_device *dev_getfirstbyhwtype(unsigned short type)
+struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
struct net_device *dev;
rtnl_lock();
- dev = __dev_getfirstbyhwtype(type);
+ dev = __dev_getfirstbyhwtype(net, type);
if (dev)
dev_hold(dev);
rtnl_unlock();
@@ -697,13 +718,13 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype);
* dev_put to indicate they have finished with it.
*/
-struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
+struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
{
struct net_device *dev, *ret;
ret = NULL;
read_lock(&dev_base_lock);
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
if (((dev->flags ^ if_flags) & mask) == 0) {
dev_hold(dev);
ret = dev;
@@ -740,9 +761,10 @@ int dev_valid_name(const char *name)
}
/**
- * dev_alloc_name - allocate a name for a device
- * @dev: device
+ * __dev_alloc_name - allocate a name for a device
+ * @net: network namespace to allocate the device name in
* @name: name format string
+ * @buf: scratch buffer and result name string
*
* Passed a format string - eg "lt%d" it will try and find a suitable
* id. It scans list of devices to build up a free map, then chooses
@@ -753,13 +775,12 @@ int dev_valid_name(const char *name)
* Returns the number of the unit assigned or a negative errno code.
*/
-int dev_alloc_name(struct net_device *dev, const char *name)
+static int __dev_alloc_name(struct net *net, const char *name, char *buf)
{
int i = 0;
- char buf[IFNAMSIZ];
const char *p;
const int max_netdevices = 8*PAGE_SIZE;
- long *inuse;
+ unsigned long *inuse;
struct net_device *d;
p = strnchr(name, IFNAMSIZ-1, '%');
@@ -773,18 +794,18 @@ int dev_alloc_name(struct net_device *dev, const char *name)
return -EINVAL;
/* Use one page as a bit array of possible slots */
- inuse = (long *) get_zeroed_page(GFP_ATOMIC);
+ inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
if (!inuse)
return -ENOMEM;
- for_each_netdev(d) {
+ for_each_netdev(net, d) {
if (!sscanf(d->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
continue;
/* avoid cases where sscanf is not exact inverse of printf */
- snprintf(buf, sizeof(buf), name, i);
+ snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, d->name, IFNAMSIZ))
set_bit(i, inuse);
}
@@ -793,11 +814,9 @@ int dev_alloc_name(struct net_device *dev, const char *name)
free_page((unsigned long) inuse);
}
- snprintf(buf, sizeof(buf), name, i);
- if (!__dev_get_by_name(buf)) {
- strlcpy(dev->name, buf, IFNAMSIZ);
+ snprintf(buf, IFNAMSIZ, name, i);
+ if (!__dev_get_by_name(net, buf))
return i;
- }
/* It is possible to run out of possible slots
* when the name is long and there isn't enough space left
@@ -806,6 +825,34 @@ int dev_alloc_name(struct net_device *dev, const char *name)
return -ENFILE;
}
+/**
+ * dev_alloc_name - allocate a name for a device
+ * @dev: device
+ * @name: name format string
+ *
+ * Passed a format string - eg "lt%d" it will try and find a suitable
+ * id. It scans list of devices to build up a free map, then chooses
+ * the first empty slot. The caller must hold the dev_base or rtnl lock
+ * while allocating the name and adding the device in order to avoid
+ * duplicates.
+ * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
+ * Returns the number of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+ char buf[IFNAMSIZ];
+ struct net *net;
+ int ret;
+
+ BUG_ON(!dev->nd_net);
+ net = dev->nd_net;
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
+}
+
/**
* dev_change_name - change name of a device
@@ -820,9 +867,12 @@ int dev_change_name(struct net_device *dev, char *newname)
char oldname[IFNAMSIZ];
int err = 0;
int ret;
+ struct net *net;
ASSERT_RTNL();
+ BUG_ON(!dev->nd_net);
+ net = dev->nd_net;
if (dev->flags & IFF_UP)
return -EBUSY;
@@ -837,7 +887,7 @@ int dev_change_name(struct net_device *dev, char *newname)
return err;
strcpy(newname, dev->name);
}
- else if (__dev_get_by_name(newname))
+ else if (__dev_get_by_name(net, newname))
return -EEXIST;
else
strlcpy(dev->name, newname, IFNAMSIZ);
@@ -847,10 +897,10 @@ rollback:
write_lock_bh(&dev_base_lock);
hlist_del(&dev->name_hlist);
- hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
write_unlock_bh(&dev_base_lock);
- ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
ret = notifier_to_errno(ret);
if (ret) {
@@ -876,7 +926,7 @@ rollback:
*/
void netdev_features_change(struct net_device *dev)
{
- raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+ call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
}
EXPORT_SYMBOL(netdev_features_change);
@@ -891,8 +941,7 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGE, dev);
+ call_netdevice_notifiers(NETDEV_CHANGE, dev);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
}
}
@@ -906,26 +955,18 @@ void netdev_state_change(struct net_device *dev)
* available in this kernel then it becomes a nop.
*/
-void dev_load(const char *name)
+void dev_load(struct net *net, const char *name)
{
struct net_device *dev;
read_lock(&dev_base_lock);
- dev = __dev_get_by_name(name);
+ dev = __dev_get_by_name(net, name);
read_unlock(&dev_base_lock);
if (!dev && capable(CAP_SYS_MODULE))
request_module("%s", name);
}
-static int default_rebuild_header(struct sk_buff *skb)
-{
- printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
- skb->dev ? skb->dev->name : "NULL!!!");
- kfree_skb(skb);
- return 1;
-}
-
/**
* dev_open - prepare an interface for use.
* @dev: device to open
@@ -988,7 +1029,7 @@ int dev_open(struct net_device *dev)
/*
* ... and announce new interface.
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+ call_netdevice_notifiers(NETDEV_UP, dev);
}
return ret;
}
@@ -1004,6 +1045,8 @@ int dev_open(struct net_device *dev)
*/
int dev_close(struct net_device *dev)
{
+ might_sleep();
+
if (!(dev->flags & IFF_UP))
return 0;
@@ -1011,23 +1054,19 @@ int dev_close(struct net_device *dev)
* Tell people we are going down, so that they can
* prepare to death, when device is still operating.
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+ call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
dev_deactivate(dev);
clear_bit(__LINK_STATE_START, &dev->state);
/* Synchronize to scheduled poll. We cannot touch poll list,
- * it can be even on different cpu. So just clear netif_running(),
- * and wait when poll really will happen. Actually, the best place
- * for this is inside dev->stop() after device stopped its irq
- * engine, but this requires more changes in devices. */
-
+ * it can be even on different cpu. So just clear netif_running().
+ *
+ * dev->stop() will invoke napi_disable() on all of it's
+ * napi_struct instances on this device.
+ */
smp_mb__after_clear_bit(); /* Commit netif_running(). */
- while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
- /* No hurry. */
- msleep(1);
- }
/*
* Call the device specific close. This cannot fail.
@@ -1048,12 +1087,14 @@ int dev_close(struct net_device *dev)
/*
* Tell people we are down
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+ call_netdevice_notifiers(NETDEV_DOWN, dev);
return 0;
}
+static int dev_boot_phase = 1;
+
/*
* Device change register/unregister. These are not inline or static
* as we export them to the world.
@@ -1077,23 +1118,27 @@ int register_netdevice_notifier(struct notifier_block *nb)
{
struct net_device *dev;
struct net_device *last;
+ struct net *net;
int err;
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
if (err)
goto unlock;
+ if (dev_boot_phase)
+ goto unlock;
+ for_each_net(net) {
+ for_each_netdev(net, dev) {
+ err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+ err = notifier_to_errno(err);
+ if (err)
+ goto rollback;
+
+ if (!(dev->flags & IFF_UP))
+ continue;
- for_each_netdev(dev) {
- err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
- err = notifier_to_errno(err);
- if (err)
- goto rollback;
-
- if (!(dev->flags & IFF_UP))
- continue;
-
- nb->notifier_call(nb, NETDEV_UP, dev);
+ nb->notifier_call(nb, NETDEV_UP, dev);
+ }
}
unlock:
@@ -1102,15 +1147,17 @@ unlock:
rollback:
last = dev;
- for_each_netdev(dev) {
- if (dev == last)
- break;
+ for_each_net(net) {
+ for_each_netdev(net, dev) {
+ if (dev == last)
+ break;
- if (dev->flags & IFF_UP) {
- nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
- nb->notifier_call(nb, NETDEV_DOWN, dev);
+ if (dev->flags & IFF_UP) {
+ nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+ nb->notifier_call(nb, NETDEV_DOWN, dev);
+ }
+ nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
}
- nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
}
goto unlock;
}
@@ -1144,9 +1191,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
* are as for raw_notifier_call_chain().
*/
-int call_netdevice_notifiers(unsigned long val, void *v)
+int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- return raw_notifier_call_chain(&netdev_chain, val, v);
+ return raw_notifier_call_chain(&netdev_chain, val, dev);
}
/* When > 0 there are consumers of rx skb time stamps */
@@ -1233,21 +1280,21 @@ void __netif_schedule(struct net_device *dev)
}
EXPORT_SYMBOL(__netif_schedule);
-void __netif_rx_schedule(struct net_device *dev)
+void dev_kfree_skb_irq(struct sk_buff *skb)
{
- unsigned long flags;
+ if (atomic_dec_and_test(&skb->users)) {
+ struct softnet_data *sd;
+ unsigned long flags;
- local_irq_save(flags);
- dev_hold(dev);
- list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
- if (dev->quota < 0)
- dev->quota += dev->weight;
- else
- dev->quota = dev->weight;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- local_irq_restore(flags);
+ local_irq_save(flags);
+ sd = &__get_cpu_var(softnet_data);
+ skb->next = sd->completion_queue;
+ sd->completion_queue = skb;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
+ }
}
-EXPORT_SYMBOL(__netif_rx_schedule);
+EXPORT_SYMBOL(dev_kfree_skb_irq);
void dev_kfree_skb_any(struct sk_buff *skb)
{
@@ -1259,7 +1306,12 @@ void dev_kfree_skb_any(struct sk_buff *skb)
EXPORT_SYMBOL(dev_kfree_skb_any);
-/* Hot-plugging. */
+/**
+ * netif_device_detach - mark device as removed
+ * @dev: network device
+ *
+ * Mark device as removed from system and therefore no longer available.
+ */
void netif_device_detach(struct net_device *dev)
{
if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1269,6 +1321,12 @@ void netif_device_detach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_detach);
+/**
+ * netif_device_attach - mark device as attached
+ * @dev: network device
+ *
+ * Mark device as attached from system and restart if needed.
+ */
void netif_device_attach(struct net_device *dev)
{
if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1501,18 +1559,6 @@ out_kfree_skb:
return 0;
}
-#define HARD_TX_LOCK(dev, cpu) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
- netif_tx_lock(dev); \
- } \
-}
-
-#define HARD_TX_UNLOCK(dev) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
- netif_tx_unlock(dev); \
- } \
-}
-
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
@@ -1730,7 +1776,7 @@ enqueue:
return NET_RX_SUCCESS;
}
- netif_rx_schedule(&queue->backlog_dev);
+ napi_schedule(&queue->backlog);
goto enqueue;
}
@@ -1771,6 +1817,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
return dev;
}
+
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -1927,7 +1974,7 @@ int netif_receive_skb(struct sk_buff *skb)
__be16 type;
/* if we've gotten here through NAPI, check netpoll */
- if (skb->dev->poll && netpoll_rx(skb))
+ if (netpoll_receive_skb(skb))
return NET_RX_DROP;
if (!skb->tstamp.tv64)
@@ -2017,22 +2064,25 @@ out:
return ret;
}
-static int process_backlog(struct net_device *backlog_dev, int *budget)
+static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
- int quota = min(backlog_dev->quota, *budget);
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
- backlog_dev->weight = weight_p;
- for (;;) {
+ napi->weight = weight_p;
+ do {
struct sk_buff *skb;
struct net_device *dev;
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
- if (!skb)
- goto job_done;
+ if (!skb) {
+ __napi_complete(napi);
+ local_irq_enable();
+ break;
+ }
+
local_irq_enable();
dev = skb->dev;
@@ -2040,67 +2090,86 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
netif_receive_skb(skb);
dev_put(dev);
+ } while (++work < quota && jiffies == start_time);
- work++;
-
- if (work >= quota || jiffies - start_time > 1)
- break;
-
- }
-
- backlog_dev->quota -= work;
- *budget -= work;
- return -1;
-
-job_done:
- backlog_dev->quota -= work;
- *budget -= work;
+ return work;
+}
- list_del(&backlog_dev->poll_list);
- smp_mb__before_clear_bit();
- netif_poll_enable(backlog_dev);
+/**
+ * __napi_schedule - schedule for receive
+ * @napi: entry to schedule
+ *
+ * The entry's receive function will be scheduled to run
+ */
+void fastcall __napi_schedule(struct napi_struct *n)
+{
+ unsigned long flags;
- local_irq_enable();
- return 0;
+ local_irq_save(flags);
+ list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ local_irq_restore(flags);
}
+EXPORT_SYMBOL(__napi_schedule);
+
static void net_rx_action(struct softirq_action *h)
{
- struct softnet_data *queue = &__get_cpu_var(softnet_data);
+ struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
unsigned long start_time = jiffies;
int budget = netdev_budget;
void *have;
local_irq_disable();
- while (!list_empty(&queue->poll_list)) {
- struct net_device *dev;
+ while (!list_empty(list)) {
+ struct napi_struct *n;
+ int work, weight;
- if (budget <= 0 || jiffies - start_time > 1)
+ /* If softirq window is exhuasted then punt.
+ *
+ * Note that this is a slight policy change from the
+ * previous NAPI code, which would allow up to 2
+ * jiffies to pass before breaking out. The test
+ * used to be "jiffies - start_time > 1".
+ */
+ if (unlikely(budget <= 0 || jiffies != start_time))
goto softnet_break;
local_irq_enable();
- dev = list_entry(queue->poll_list.next,
- struct net_device, poll_list);
- have = netpoll_poll_lock(dev);
+ /* Even though interrupts have been re-enabled, this
+ * access is safe because interrupts can only add new
+ * entries to the tail of this list, and only ->poll()
+ * calls can remove this head entry from the list.
+ */
+ n = list_entry(list->next, struct napi_struct, poll_list);
- if (dev->quota <= 0 || dev->poll(dev, &budget)) {
- netpoll_poll_unlock(have);
- local_irq_disable();
- list_move_tail(&dev->poll_list, &queue->poll_list);
- if (dev->quota < 0)
- dev->quota += dev->weight;
- else
- dev->quota = dev->weight;
- } else {
- netpoll_poll_unlock(have);
- dev_put(dev);
- local_irq_disable();
- }
+ have = netpoll_poll_lock(n);
+
+ weight = n->weight;
+
+ work = n->poll(n, weight);
+
+ WARN_ON_ONCE(work > weight);
+
+ budget -= work;
+
+ local_irq_disable();
+
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (unlikely(work == weight))
+ list_move_tail(&n->poll_list, list);
+
+ netpoll_poll_unlock(have);
}
out:
local_irq_enable();
+
#ifdef CONFIG_NET_DMA
/*
* There may not be any more sk_buffs coming right now, so push
@@ -2115,6 +2184,7 @@ out:
}
}
#endif
+
return;
softnet_break:
@@ -2154,7 +2224,7 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
* match. --pb
*/
-static int dev_ifname(struct ifreq __user *arg)
+static int dev_ifname(struct net *net, struct ifreq __user *arg)
{
struct net_device *dev;
struct ifreq ifr;
@@ -2167,7 +2237,7 @@ static int dev_ifname(struct ifreq __user *arg)
return -EFAULT;
read_lock(&dev_base_lock);
- dev = __dev_get_by_index(ifr.ifr_ifindex);
+ dev = __dev_get_by_index(net, ifr.ifr_ifindex);
if (!dev) {
read_unlock(&dev_base_lock);
return -ENODEV;
@@ -2187,7 +2257,7 @@ static int dev_ifname(struct ifreq __user *arg)
* Thus we will need a 'compatibility mode'.
*/
-static int dev_ifconf(char __user *arg)
+static int dev_ifconf(struct net *net, char __user *arg)
{
struct ifconf ifc;
struct net_device *dev;
@@ -2211,7 +2281,7 @@ static int dev_ifconf(char __user *arg)
*/
total = 0;
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
for (i = 0; i < NPROTO; i++) {
if (gifconf_list[i]) {
int done;
@@ -2245,6 +2315,7 @@ static int dev_ifconf(char __user *arg)
*/
void *dev_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq->private;
loff_t off;
struct net_device *dev;
@@ -2253,7 +2324,7 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
return SEQ_START_TOKEN;
off = 1;
- for_each_netdev(dev)
+ for_each_netdev(net, dev)
if (off++ == *pos)
return dev;
@@ -2262,9 +2333,10 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq->private;
++*pos;
return v == SEQ_START_TOKEN ?
- first_net_device() : next_net_device((struct net_device *)v);
+ first_net_device(net) : next_net_device((struct net_device *)v);
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -2360,7 +2432,26 @@ static const struct seq_operations dev_seq_ops = {
static int dev_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &dev_seq_ops);
+ struct seq_file *seq;
+ int res;
+ res = seq_open(file, &dev_seq_ops);
+ if (!res) {
+ seq = file->private_data;
+ seq->private = get_proc_net(inode);
+ if (!seq->private) {
+ seq_release(inode, file);
+ res = -ENXIO;
+ }
+ }
+ return res;
+}
+
+static int dev_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct net *net = seq->private;
+ put_net(net);
+ return seq_release(inode, file);
}
static const struct file_operations dev_seq_fops = {
@@ -2368,7 +2459,7 @@ static const struct file_operations dev_seq_fops = {
.open = dev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = dev_seq_release,
};
static const struct seq_operations softnet_seq_ops = {
@@ -2520,30 +2611,49 @@ static const struct file_operations ptype_seq_fops = {
};
-static int __init dev_proc_init(void)
+static int __net_init dev_proc_net_init(struct net *net)
{
int rc = -ENOMEM;
- if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+ if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
goto out;
- if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
+ if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
goto out_dev;
- if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
- goto out_dev2;
-
- if (wext_proc_init())
+ if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
goto out_softnet;
+
+ if (wext_proc_init(net))
+ goto out_ptype;
rc = 0;
out:
return rc;
+out_ptype:
+ proc_net_remove(net, "ptype");
out_softnet:
- proc_net_remove("ptype");
-out_dev2:
- proc_net_remove("softnet_stat");
+ proc_net_remove(net, "softnet_stat");
out_dev:
- proc_net_remove("dev");
+ proc_net_remove(net, "dev");
goto out;
}
+
+static void __net_exit dev_proc_net_exit(struct net *net)
+{
+ wext_proc_exit(net);
+
+ proc_net_remove(net, "ptype");
+ proc_net_remove(net, "softnet_stat");
+ proc_net_remove(net, "dev");
+}
+
+static struct pernet_operations __net_initdata dev_proc_ops = {
+ .init = dev_proc_net_init,
+ .exit = dev_proc_net_exit,
+};
+
+static int __init dev_proc_init(void)
+{
+ return register_pernet_subsys(&dev_proc_ops);
+}
#else
#define dev_proc_init() 0
#endif /* CONFIG_PROC_FS */
@@ -2906,8 +3016,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
if (dev->flags & IFF_UP &&
((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
IFF_VOLATILE)))
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGE, dev);
+ call_netdevice_notifiers(NETDEV_CHANGE, dev);
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? +1 : -1;
@@ -2953,8 +3062,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
else
dev->mtu = new_mtu;
if (!err && dev->flags & IFF_UP)
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEMTU, dev);
+ call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
return err;
}
@@ -2970,18 +3078,17 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
return -ENODEV;
err = dev->set_mac_address(dev, sa);
if (!err)
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEADDR, dev);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
return err;
}
/*
- * Perform the SIOCxIFxxx calls.
+ * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
*/
-static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
{
int err;
- struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
if (!dev)
return -ENODEV;
@@ -2991,25 +3098,15 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_flags = dev_get_flags(dev);
return 0;
- case SIOCSIFFLAGS: /* Set interface flags */
- return dev_change_flags(dev, ifr->ifr_flags);
-
case SIOCGIFMETRIC: /* Get the metric on the interface
(currently unused) */
ifr->ifr_metric = 0;
return 0;
- case SIOCSIFMETRIC: /* Set the metric on the interface
- (currently unused) */
- return -EOPNOTSUPP;
-
case SIOCGIFMTU: /* Get the MTU of a device */
ifr->ifr_mtu = dev->mtu;
return 0;
- case SIOCSIFMTU: /* Set the MTU of a device */
- return dev_set_mtu(dev, ifr->ifr_mtu);
-
case SIOCGIFHWADDR:
if (!dev->addr_len)
memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
@@ -3019,17 +3116,9 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_hwaddr.sa_family = dev->type;
return 0;
- case SIOCSIFHWADDR:
- return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
-
- case SIOCSIFHWBROADCAST:
- if (ifr->ifr_hwaddr.sa_family != dev->type)
- return -EINVAL;
- memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
- min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEADDR, dev);
- return 0;
+ case SIOCGIFSLAVE:
+ err = -EINVAL;
+ break;
case SIOCGIFMAP:
ifr->ifr_map.mem_start = dev->mem_start;
@@ -3040,6 +3129,59 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_map.port = dev->if_port;
return 0;
+ case SIOCGIFINDEX:
+ ifr->ifr_ifindex = dev->ifindex;
+ return 0;
+
+ case SIOCGIFTXQLEN:
+ ifr->ifr_qlen = dev->tx_queue_len;
+ return 0;
+
+ default:
+ /* dev_ioctl() should ensure this case
+ * is never reached
+ */
+ WARN_ON(1);
+ err = -EINVAL;
+ break;
+
+ }
+ return err;
+}
+
+/*
+ * Perform the SIOCxIFxxx calls, inside rtnl_lock()
+ */
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+ int err;
+ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+
+ if (!dev)
+ return -ENODEV;
+
+ switch (cmd) {
+ case SIOCSIFFLAGS: /* Set interface flags */
+ return dev_change_flags(dev, ifr->ifr_flags);
+
+ case SIOCSIFMETRIC: /* Set the metric on the interface
+ (currently unused) */
+ return -EOPNOTSUPP;
+
+ case SIOCSIFMTU: /* Set the MTU of a device */
+ return dev_set_mtu(dev, ifr->ifr_mtu);
+
+ case SIOCSIFHWADDR:
+ return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+
+ case SIOCSIFHWBROADCAST:
+ if (ifr->ifr_hwaddr.sa_family != dev->type)
+ return -EINVAL;
+ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return 0;
+
case SIOCSIFMAP:
if (dev->set_config) {
if (!netif_device_present(dev))
@@ -3066,14 +3208,6 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
dev->addr_len, 1);
- case SIOCGIFINDEX:
- ifr->ifr_ifindex = dev->ifindex;
- return 0;
-
- case SIOCGIFTXQLEN:
- ifr->ifr_qlen = dev->tx_queue_len;
- return 0;
-
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
return -EINVAL;
@@ -3134,7 +3268,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
* positive or a negative errno code on error.
*/
-int dev_ioctl(unsigned int cmd, void __user *arg)
+int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct ifreq ifr;
int ret;
@@ -3147,12 +3281,12 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
if (cmd == SIOCGIFCONF) {
rtnl_lock();
- ret = dev_ifconf((char __user *) arg);
+ ret = dev_ifconf(net, (char __user *) arg);
rtnl_unlock();
return ret;
}
if (cmd == SIOCGIFNAME)
- return dev_ifname((struct ifreq __user *)arg);
+ return dev_ifname(net, (struct ifreq __user *)arg);
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
@@ -3182,9 +3316,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
read_lock(&dev_base_lock);
- ret = dev_ifsioc(&ifr, cmd);
+ ret = dev_ifsioc_locked(net, &ifr, cmd);
read_unlock(&dev_base_lock);
if (!ret) {
if (colon)
@@ -3196,9 +3330,