33 files changed, 8427 insertions, 4176 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 796f46eece5..8a04dd22cf7 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -6,9 +6,8 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 	 gen_stats.o gen_estimator.o net_namespace.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-$(CONFIG_HAS_DMA) += skb_dma_map.o
 
-obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
+obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
 
 obj-$(CONFIG_XFRM) += flow.o
@@ -19,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
-
+obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 58abee1f1df..18ac112ea7a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -48,6 +48,7 @@
 #include <linux/poll.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -55,6 +56,7 @@
 #include <net/checksum.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
+#include <trace/events/skb.h>
 
 /*
  *	Is a socket 'connection oriented' ?
@@ -84,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 	int error;
 	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
 
-	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 	/* Socket errors? */
 	error = sock_error(sk);
@@ -113,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 	error = 0;
 	*timeo_p = schedule_timeout(*timeo_p);
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return error;
 interrupted:
 	error = sock_intr_errno(*timeo_p);
@@ -175,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
 		 * interrupt level will suddenly eat the receive_queue.
 		 *
 		 * Look at current nfs client by the way...
-		 * However, this function was corrent in any case. 8)
+		 * However, this function was correct in any case. 8)
 		 */
 		unsigned long cpu_flags;
 
@@ -217,12 +219,34 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
 				   &peeked, err);
 }
+EXPORT_SYMBOL(skb_recv_datagram);
 
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 {
 	consume_skb(skb);
 	sk_mem_reclaim_partial(sk);
 }
+EXPORT_SYMBOL(skb_free_datagram);
+
+void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
+{
+	bool slow;
+
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+
+	slow = lock_sock_fast(sk);
+	skb_orphan(skb);
+	sk_mem_reclaim_partial(sk);
+	unlock_sock_fast(sk, slow);
+
+	/* skb is now orphaned, can be freed outside of locked section */
+	trace_kfree_skb(skb, skb_free_datagram_locked);
+	__kfree_skb(skb);
+}
+EXPORT_SYMBOL(skb_free_datagram_locked);
 
 /**
  *	skb_kill_datagram - Free a datagram skbuff forcibly
@@ -261,11 +285,11 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 	}
 
 	kfree_skb(skb);
+	atomic_inc(&sk->sk_drops);
 	sk_mem_reclaim_partial(sk);
 
 	return err;
 }
-
 EXPORT_SYMBOL(skb_kill_datagram);
 
 /**
@@ -284,6 +308,8 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 	int i, copy = start - offset;
 	struct sk_buff *frag_iter;
 
+	trace_skb_copy_datagram_iovec(skb, len);
+
 	/* Copy header. */
 	if (copy > 0) {
 		if (copy > len)
@@ -348,6 +374,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 fault:
 	return -EFAULT;
 }
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
 
 /**
  *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
@@ -691,6 +718,7 @@ csum_error:
 fault:
 	return -EFAULT;
 }
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
 
 /**
  * 	datagram_poll - generic datagram poll
@@ -712,20 +740,19 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	unsigned int mask;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 		mask |= POLLERR;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
-		mask |= POLLRDHUP;
+		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= POLLHUP;
 
 	/* readable? */
-	if (!skb_queue_empty(&sk->sk_receive_queue) ||
-	    (sk->sk_shutdown & RCV_SHUTDOWN))
+	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
 
 	/* Connection-based need to check for termination and startup */
@@ -745,9 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 
 	return mask;
 }
-
 EXPORT_SYMBOL(datagram_poll);
-EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
-EXPORT_SYMBOL(skb_copy_datagram_iovec);
-EXPORT_SYMBOL(skb_free_datagram);
-EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index baf2dc13a34..a3ef808b5e3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -79,6 +79,8 @@
 #include <linux/cpu.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/hash.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/mutex.h>
 #include <linux/string.h>
@@ -99,11 +101,10 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/stat.h>
-#include <linux/if_bridge.h>
-#include <linux/if_macvlan.h>
 #include <net/dst.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
+#include <net/xfrm.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
@@ -127,6 +128,10 @@
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <trace/events/napi.h>
+#include <trace/events/net.h>
+#include <trace/events/skb.h>
+#include <linux/pci.h>
+#include <linux/inetdevice.h>
 
 #include "net-sysfs.h"
 
@@ -175,7 +180,7 @@ static struct list_head ptype_all __read_mostly;	/* Taps */
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
  * semaphore.
  *
- * Pure readers hold dev_base_lock for reading.
+ * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
  *
  * Writers must hold the rtnl semaphore while they loop through the
  * dev_base_head list, and hold dev_base_lock for writing when they do the
@@ -191,21 +196,31 @@ static struct list_head ptype_all __read_mostly;	/* Taps */
  * semaphore held.
  */
 DEFINE_RWLOCK(dev_base_lock);
-
 EXPORT_SYMBOL(dev_base_lock);
 
-#define NETDEV_HASHBITS	8
-#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
-
 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 {
 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
-	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
+	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
 }
 
 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 {
-	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
+	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
+}
+
+static inline void rps_lock(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	spin_lock(&sd->input_pkt_queue.lock);
+#endif
+}
+
+static inline void rps_unlock(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	spin_unlock(&sd->input_pkt_queue.lock);
+#endif
 }
 
 /* Device list insertion */
@@ -216,23 +231,26 @@ static int list_netdevice(struct net_device *dev)
 	ASSERT_RTNL();
 
 	write_lock_bh(&dev_base_lock);
-	list_add_tail(&dev->dev_list, &net->dev_base_head);
-	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
-	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
+	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
+	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+	hlist_add_head_rcu(&dev->index_hlist,
+			   dev_index_hash(net, dev->ifindex));
 	write_unlock_bh(&dev_base_lock);
 	return 0;
 }
 
-/* Device list removal */
+/* Device list removal
+ * caller must respect a RCU grace period before freeing/reusing dev
+ */
 static void unlist_netdevice(struct net_device *dev)
 {
 	ASSERT_RTNL();
 
 	/* Unlink dev from the device chain */
 	write_lock_bh(&dev_base_lock);
-	list_del(&dev->dev_list);
-	hlist_del(&dev->name_hlist);
-	hlist_del(&dev->index_hlist);
+	list_del_rcu(&dev->dev_list);
+	hlist_del_rcu(&dev->name_hlist);
+	hlist_del_rcu(&dev->index_hlist);
 	write_unlock_bh(&dev_base_lock);
 }
 
@@ -247,7 +265,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
  *	queue in the local softnet handler.
  */
 
-DEFINE_PER_CPU(struct softnet_data, softnet_data);
+DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
+EXPORT_PER_CPU_SYMBOL(softnet_data);
 
 #ifdef CONFIG_LOCKDEP
 /*
@@ -269,10 +288,10 @@ static const unsigned short netdev_lock_type[] =
 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
-	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY,
+	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
 	 ARPHRD_VOID, ARPHRD_NONE};
 
-static const char *netdev_lock_name[] =
+static const char *const netdev_lock_name[] =
 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
@@ -287,7 +306,7 @@ static const char *netdev_lock_name[] =
 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
-	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY",
+	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
 	 "_xmit_VOID", "_xmit_NONE"};
 
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -355,6 +374,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
  *							--ANK (980803)
  */
 
+static inline struct list_head *ptype_head(const struct packet_type *pt)
+{
+	if (pt->type == htons(ETH_P_ALL))
+		return &ptype_all;
+	else
+		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+}
+
 /**
  *	dev_add_pack - add packet handler
  *	@pt: packet type declaration
@@ -370,17 +397,13 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 
 void dev_add_pack(struct packet_type *pt)
 {
-	int hash;
+	struct list_head *head = ptype_head(pt);
 
-	spin_lock_bh(&ptype_lock);
-	if (pt->type == htons(ETH_P_ALL))
-		list_add_rcu(&pt->list, &ptype_all);
-	else {
-		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
-		list_add_rcu(&pt->list, &ptype_base[hash]);
-	}
-	spin_unlock_bh(&ptype_lock);
+	spin_lock(&ptype_lock);
+	list_add_rcu(&pt->list, head);
+	spin_unlock(&ptype_lock);
 }
+EXPORT_SYMBOL(dev_add_pack);
 
 /**
  *	__dev_remove_pack	 - remove packet handler
@@ -397,15 +420,10 @@ void dev_add_pack(struct packet_type *pt)
  */
 void __dev_remove_pack(struct packet_type *pt)
 {
-	struct list_head *head;
+	struct list_head *head = ptype_head(pt);
 	struct packet_type *pt1;
 
-	spin_lock_bh(&ptype_lock);
-
-	if (pt->type == htons(ETH_P_ALL))
-		head = &ptype_all;
-	else
-		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+	spin_lock(&ptype_lock);
 
 	list_for_each_entry(pt1, head, list) {
 		if (pt == pt1) {
@@ -416,8 +434,10 @@ void __dev_remove_pack(struct packet_type *pt)
 
 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 out:
-	spin_unlock_bh(&ptype_lock);
+	spin_unlock(&ptype_lock);
 }
+EXPORT_SYMBOL(__dev_remove_pack);
+
 /**
  *	dev_remove_pack	 - remove packet handler
  *	@pt: packet type declaration
@@ -436,6 +456,7 @@ void dev_remove_pack(struct packet_type *pt)
 
 	synchronize_net();
 }
+EXPORT_SYMBOL(dev_remove_pack);
 
 /******************************************************************************
 
@@ -499,6 +520,7 @@ int netdev_boot_setup_check(struct net_device *dev)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(netdev_boot_setup_check);
 
 
 /**
@@ -582,15 +604,42 @@ __setup("netdev=", netdev_boot_setup);
 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 {
 	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_name_hash(net, name);
 
-	hlist_for_each(p, dev_name_hash(net, name)) {
-		struct net_device *dev
-			= hlist_entry(p, struct net_device, name_hlist);
+	hlist_for_each_entry(dev, p, head, name_hlist)
 		if (!strncmp(dev->name, name, IFNAMSIZ))
 			return dev;
-	}
+
 	return NULL;
 }
+EXPORT_SYMBOL(__dev_get_by_name);
+
+/**
+ *	dev_get_by_name_rcu	- find a device by its name
+ *	@net: the applicable net namespace
+ *	@name: name to find
+ *
+ *	Find an interface by name.
+ *	If the name is found a pointer to the device is returned.
+ * 	If the name is not found then %NULL is returned.
+ *	The reference counters are not incremented so the caller must be
+ *	careful with locks. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
+{
+	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_name_hash(net, name);
+
+	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
+		if (!strncmp(dev->name, name, IFNAMSIZ))
+			return dev;
+
+	return NULL;
+}
+EXPORT_SYMBOL(dev_get_by_name_rcu);
 
 /**
  *	dev_get_by_name		- find a device by its name
@@ -608,13 +657,14 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	dev = __dev_get_by_name(net, name);
+	rcu_read_lock();
+	dev = dev_get_by_name_rcu(net, name);
 	if (dev)
 		dev_hold(dev);
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return dev;
 }
+EXPORT_SYMBOL(dev_get_by_name);
 
 /**
  *	__dev_get_by_index - find a device by its ifindex
@@ -631,15 +681,41 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 {
 	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_index_hash(net, ifindex);
 
-	hlist_for_each(p, dev_index_hash(net, ifindex)) {
-		struct net_device *dev
-			= hlist_entry(p, struct net_device, index_hlist);
+	hlist_for_each_entry(dev, p, head, index_hlist)
 		if (dev->ifindex == ifindex)
 			return dev;
-	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(__dev_get_by_index);
+
+/**
+ *	dev_get_by_index_rcu - find a device by its ifindex
+ *	@net: the applicable net namespace
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns %NULL if the device
+ *	is not found or a pointer to the device. The device has not
+ *	had its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
+{
+	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_index_hash(net, ifindex);
+
+	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
+		if (dev->ifindex == ifindex)
+			return dev;
+
 	return NULL;
 }
+EXPORT_SYMBOL(dev_get_by_index_rcu);
 
 
 /**
@@ -657,44 +733,41 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	dev = __dev_get_by_index(net, ifindex);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
 	if (dev)
 		dev_hold(dev);
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 	return dev;
 }
+EXPORT_SYMBOL(dev_get_by_index);
 
 /**
- *	dev_getbyhwaddr - find a device by its hardware address
+ *	dev_getbyhwaddr_rcu - find a device by its hardware address
  *	@net: the applicable net namespace
  *	@type: media type of device
  *	@ha: hardware address
  *
  *	Search for an interface by MAC address. Returns NULL if the device
- *	is not found or a pointer to the device. The caller must hold the
- *	rtnl semaphore. The returned device has not had its ref count increased
+ *	is not found or a pointer to the device. The caller must hold RCU
+ *	The returned device has not had its ref count increased
  *	and the caller must therefore be careful about locking
  *
- *	BUGS:
- *	If the API was consistent this would be __dev_get_by_hwaddr
  */
 
-struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+				       const char *ha)
 {
 	struct net_device *dev;
 
-	ASSERT_RTNL();
-
-	for_each_netdev(net, dev)
+	for_each_netdev_rcu(net, dev)
 		if (dev->type == type &&
 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 			return dev;
 
 	return NULL;
 }
-
-EXPORT_SYMBOL(dev_getbyhwaddr);
+EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
 
 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
@@ -707,51 +780,50 @@ struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 
 	return NULL;
 }
-
 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 
 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
-	struct net_device *dev;
+	struct net_device *dev, *ret = NULL;
 
-	rtnl_lock();
-	dev = __dev_getfirstbyhwtype(net, type);
-	if (dev)
-		dev_hold(dev);
-	rtnl_unlock();
-	return dev;
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev)
+		if (dev->type == type) {
+			dev_hold(dev);
+			ret = dev;
+			break;
+		}
+	rcu_read_unlock();
+	return ret;
 }
-
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
 /**
- *	dev_get_by_flags - find any device with given flags
+ *	dev_get_by_flags_rcu - find any device with given flags
  *	@net: the applicable net namespace
  *	@if_flags: IFF_* values
  *	@mask: bitmask of bits in if_flags to check
  *
  *	Search for any interface with the given flags. Returns NULL if a device
- *	is not found or a pointer to the device. The device returned has
- *	had a reference added and the pointer is safe until the user calls
- *	dev_put to indicate they have finished with it.
+ *	is not found or a pointer to the device. Must be called inside
+ *	rcu_read_lock(), and result refcount is unchanged.
  */
 
-struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
+struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
+				    unsigned short mask)
 {
 	struct net_device *dev, *ret;
 
 	ret = NULL;
-	read_lock(&dev_base_lock);
-	for_each_netdev(net, dev) {
+	for_each_netdev_rcu(net, dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
-			dev_hold(dev);
 			ret = dev;
 			break;
 		}
 	}
-	read_unlock(&dev_base_lock);
 	return ret;
 }
+EXPORT_SYMBOL(dev_get_by_flags_rcu);
 
 /**
  *	dev_valid_name - check if name is okay for network device
@@ -777,6 +849,7 @@ int dev_valid_name(const char *name)
 	}
 	return 1;
 }
+EXPORT_SYMBOL(dev_valid_name);
 
 /**
  *	__dev_alloc_name - allocate a name for a device
@@ -832,7 +905,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 		free_page((unsigned long) inuse);
 	}
 
-	snprintf(buf, IFNAMSIZ, name, i);
+	if (buf != name)
+		snprintf(buf, IFNAMSIZ, name, i);
 	if (!__dev_get_by_name(net, buf))
 		return i;
 
@@ -870,7 +944,27 @@ int dev_alloc_name(struct net_device *dev, const char *name)
 		strlcpy(dev->name, buf, IFNAMSIZ);
 	return ret;
 }
+EXPORT_SYMBOL(dev_alloc_name);
 
+static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
+{
+	struct net *net;
+
+	BUG_ON(!dev_net(dev));
+	net = dev_net(dev);
+
+	if (!dev_valid_name(name))
+		return -EINVAL;
+
+	if (fmt && strchr(name, '%'))
+		return dev_alloc_name(dev, name);
+	else if (__dev_get_by_name(net, name))
+		return -EEXIST;
+	else if (dev->name != name)
+		strlcpy(dev->name, name, IFNAMSIZ);
+
+	return 0;
+}
 
 /**
  *	dev_change_name - change name of a device
@@ -894,53 +988,45 @@ int dev_change_name(struct net_device *dev, const char *newname)
 	if (dev->flags & IFF_UP)
 		return -EBUSY;
 
-	if (!dev_valid_name(newname))
-		return -EINVAL;
-
 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 		return 0;
 
 	memcpy(oldname, dev->name, IFNAMSIZ);
 
-	if (strchr(newname, '%')) {
-		err = dev_alloc_name(dev, newname);
-		if (err < 0)
-			return err;
-	}
-	else if (__dev_get_by_name(net, newname))
-		return -EEXIST;
-	else
-		strlcpy(dev->name, newname, IFNAMSIZ);
+	err = dev_get_valid_name(dev, newname, 1);
+	if (err < 0)
+		return err;
 
 rollback:
-	/* For now only devices in the initial network namespace
-	 * are in sysfs.
-	 */
-	if (net == &init_net) {
-		ret = device_rename(&dev->dev, dev->name);
-		if (ret) {
-			memcpy(dev->name, oldname, IFNAMSIZ);
-			return ret;
-		}
+	ret = device_rename(&dev->dev, dev->name);
+	if (ret) {
+		memcpy(dev->name, oldname, IFNAMSIZ);
+		return ret;
 	}
 
 	write_lock_bh(&dev_base_lock);
 	hlist_del(&dev->name_hlist);
-	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+	write_unlock_bh(&dev_base_lock);
+
+	synchronize_rcu();
+
+	write_lock_bh(&dev_base_lock);
+	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 	write_unlock_bh(&dev_base_lock);
 
 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 	ret = notifier_to_errno(ret);
 
 	if (ret) {
-		if (err) {
-			printk(KERN_ERR
-			       "%s: name change rollback failed: %d.\n",
-			       dev->name, ret);
-		} else {
+		/* err >= 0 after dev_alloc_name() or stores the first errno */
+		if (err >= 0) {
 			err = ret;
 			memcpy(dev->name, oldname, IFNAMSIZ);
 			goto rollback;
+		} else {
+			printk(KERN_ERR
+			       "%s: name change rollback failed: %d.\n",
+			       dev->name, ret);
 		}
 	}
 
@@ -970,7 +1056,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 		return 0;
 	}
 
-	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
+	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
 	if (!dev->ifalias)
 		return -ENOMEM;
 
@@ -1006,10 +1092,11 @@ void netdev_state_change(struct net_device *dev)
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 	}
 }
+EXPORT_SYMBOL(netdev_state_change);
 
-void netdev_bonding_change(struct net_device *dev)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
 {
-	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+	return call_netdevice_notifiers(event, dev);
 }
 EXPORT_SYMBOL(netdev_bonding_change);
 
@@ -1027,27 +1114,16 @@ void dev_load(struct net *net, const char *name)
 {
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
-	dev = __dev_get_by_name(net, name);
-	read_unlock(&dev_base_lock);
+	rcu_read_lock();
+	dev = dev_get_by_name_rcu(net, name);
+	rcu_read_unlock();
 
-	if (!dev && capable(CAP_SYS_MODULE))
+	if (!dev && capable(CAP_NET_ADMIN))
 		request_module("%s", name);
 }
+EXPORT_SYMBOL(dev_load);
 
-/**
- *	dev_open	- prepare an interface for use.
- *	@dev:	device to open
- *
- *	Takes a device from down to up state. The device's private open
- *	function is invoked and then the multicast lists are loaded. Finally
- *	the device is moved into the up state and a %NETDEV_UP message is
- *	sent to the netdev notifier chain.
- *
- *	Calling this function on an active interface is a nop. On a failure
- *	a negative errno code is returned.
- */
-int dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int ret;
@@ -1055,13 +1131,6 @@ int dev_open(struct net_device *dev)
 	ASSERT_RTNL();
 
 	/*
-	 *	Is it already up?
-	 */
-
-	if (dev->flags & IFF_UP)
-		return 0;
-
-	/*
 	 *	Is it even present?
 	 */
 	if (!netif_device_present(dev))
@@ -1109,81 +1178,156 @@ int dev_open(struct net_device *dev)
 		 *	Wakeup transmit queue engine
 		 */
 		dev_activate(dev);
-
-		/*
-		 *	... and announce new interface.
-		 */
-		call_netdevice_notifiers(NETDEV_UP, dev);
 	}
 
 	return ret;
 }
 
 /**
- *	dev_close - shutdown an interface.
- *	@dev: device to shutdown
+ *	dev_open	- prepare an interface for use.
+ *	@dev:	device to open
  *
- *	This function moves an active device into down state. A
- *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
- *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
- *	chain.
+ *	Takes a device from down to up state. The device's private open
+ *	function is invoked and then the multicast lists are loaded. Finally
+ *	the device is moved into the up state and a %NETDEV_UP message is
+ *	sent to the netdev notifier chain.
+ *
+ *	Calling this function on an active interface is a nop. On a failure
+ *	a negative errno code is returned.
  */
-int dev_close(struct net_device *dev)
+int dev_open(struct net_device *dev)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
-	ASSERT_RTNL();
-
-	might_sleep();
+	int ret;
 
-	if (!(dev->flags & IFF_UP))
+	/*
+	 *	Is it already up?
+	 */
+	if (dev->flags & IFF_UP)
 		return 0;
 
 	/*
-	 *	Tell people we are going down, so that they can
-	 *	prepare to death, when device is still operating.
+	 *	Open device
 	 */
-	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
-
-	clear_bit(__LINK_STATE_START, &dev->state);
+	ret = __dev_open(dev);
+	if (ret < 0)
+		return ret;
 
-	/* Synchronize to scheduled poll. We cannot touch poll list,
-	 * it can be even on different cpu. So just clear netif_running().
-	 *
-	 * dev->stop() will invoke napi_disable() on all of it's
-	 * napi_struct instances on this device.
+	/*
+	 *	... and announce new interface.
 	 */
-	smp_mb__after_clear_bit(); /* Commit netif_running(). */
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	call_netdevice_notifiers(NETDEV_UP, dev);
 
-	dev_deactivate(dev);
+	return ret;
+}
+EXPORT_SYMBOL(dev_open);
 
-	/*
-	 *	Call the device specific close. This cannot fail.
-	 *	Only if device is UP
-	 *
-	 *	We allow it to be called even after a DETACH hot-plug
-	 *	event.
-	 */
-	if (ops->ndo_stop)
-		ops->ndo_stop(dev);
+static int __dev_close_many(struct list_head *head)
+{
+	struct net_device *dev;
 
-	/*
-	 *	Device is now down.
-	 */
+	ASSERT_RTNL();
+	might_sleep();
 
-	dev->flags &= ~IFF_UP;
+	list_for_each_entry(dev, head, unreg_list) {
+		/*
+		 *	Tell people we are going down, so that they can
+		 *	prepare to death, when device is still operating.
+		 */
+		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
+
+		clear_bit(__LINK_STATE_START, &dev->state);
+
+		/* Synchronize to scheduled poll. We cannot touch poll list, it
+		 * can be even on different cpu. So just clear netif_running().
+		 *
+		 * dev->stop() will invoke napi_disable() on all of it's
+		 * napi_struct instances on this device.
+		 */
+		smp_mb__after_clear_bit(); /* Commit netif_running(). */
+	}
+
+	dev_deactivate_many(head);
+
+	list_for_each_entry(dev, head, unreg_list) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+
+		/*
+		 *	Call the device specific close. This cannot fail.
+		 *	Only if device is UP
+		 *
+		 *	We allow it to be called even after a DETACH hot-plug
+		 *	event.
+		 */
+		if (ops->ndo_stop)
+			ops->ndo_stop(dev);
+
+		/*
+		 *	Device is now down.
+		 */
+
+		dev->flags &= ~IFF_UP;
+
+		/*
+		 *	Shutdown NET_DMA
+		 */
+		net_dmaengine_put();
+	}
+
+	return 0;
+}
+
+static int __dev_close(struct net_device *dev)
+{
+	LIST_HEAD(single);
+
+	list_add(&dev->unreg_list, &single);
+	return __dev_close_many(&single);
+}
+
+int dev_close_many(struct list_head *head)
+{
+	struct net_device *dev, *tmp;
+	LIST_HEAD(tmp_list);
+
+	list_for_each_entry_safe(dev, tmp, head, unreg_list)
+		if (!(dev->flags & IFF_UP))
+			list_move(&dev->unreg_list, &tmp_list);
+
+	__dev_close_many(head);
 
 	/*
 	 * Tell people we are down
 	 */
-	call_netdevice_notifiers(NETDEV_DOWN, dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		call_netdevice_notifiers(NETDEV_DOWN, dev);
+	}
 
-	/*
-	 *	Shutdown NET_DMA
-	 */
-	net_dmaengine_put();
+	/* rollback_registered_many needs the complete original list */
+	list_splice(&tmp_list, head);
+	return 0;
+}
+
+/**
+ *	dev_close - shutdown an interface.
+ *	@dev: device to shutdown
+ *
+ *	This function moves an active device into down state. A
+ *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ *	chain.
+ */
+int dev_close(struct net_device *dev)
+{
+	LIST_HEAD(single);
+
+	list_add(&dev->unreg_list, &single);
+	dev_close_many(&single);
 
 	return 0;
 }
+EXPORT_SYMBOL(dev_close);
 
 
 /**
@@ -1273,12 +1417,14 @@ rollback:
 				nb->notifier_call(nb, NETDEV_DOWN, dev);
 			}
 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
 		}
 	}
 
 	raw_notifier_chain_unregister(&netdev_chain, nb);
 	goto unlock;
 }
+EXPORT_SYMBOL(register_netdevice_notifier);
 
 /**
  *	unregister_netdevice_notifier - unregister a network notifier block
@@ -1299,6 +1445,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 	rtnl_unlock();
 	return err;
 }
+EXPORT_SYMBOL(unregister_netdevice_notifier);
 
 /**
  *	call_netdevice_notifiers - call all network notifier blocks
@@ -1311,6 +1458,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
+	ASSERT_RTNL();
 	return raw_notifier_call_chain(&netdev_chain, val, dev);
 }
 
@@ -1321,13 +1469,15 @@ void net_enable_timestamp(void)
 {
 	atomic_inc(&netstamp_needed);
 }
+EXPORT_SYMBOL(net_enable_timestamp);
 
 void net_disable_timestamp(void)
 {
 	atomic_dec(&netstamp_needed);
 }
+EXPORT_SYMBOL(net_disable_timestamp);
 
-static inline void net_timestamp(struct sk_buff *skb)
+static inline void net_timestamp_set(struct sk_buff *skb)
 {
 	if (atomic_read(&netstamp_needed))
 		__net_timestamp(skb);
@@ -1335,6 +1485,57 @@ static inline void net_timestamp(struct sk_buff *skb)
 		skb->tstamp.tv64 = 0;
 }
 
+static inline void net_timestamp_check(struct sk_buff *skb)
+{
+	if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
+		__net_timestamp(skb);
+}
+
+/**
+ * dev_forward_skb - loopback an skb to another netif
+ *
+ * @dev: destination network device
+ * @skb: buffer to forward
+ *
+ * return values:
+ *	NET_RX_SUCCESS	(no congestion)
+ *	NET_RX_DROP     (packet was dropped, but freed)
+ *
+ * dev_forward_skb can be used for injecting an skb from the
+ * start_xmit function of one device into the receive queue
+ * of another device.
+ *
+ * The receiving device may be in another namespace, so
+ * we have to clear all information in the skb that could
+ * impact namespace isolation.
+ */
+int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	skb_orphan(skb);
+	nf_reset(skb);
+
+	if (unlikely(!(dev->flags & IFF_UP) ||
+		     (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
+		atomic_long_inc(&dev->rx_dropped);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+	skb_set_dev(skb, dev);
+	skb->tstamp.tv64 = 0;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, dev);
+	return netif_rx(skb);
+}
+EXPORT_SYMBOL_GPL(dev_forward_skb);
+
+static inline int deliver_skb(struct sk_buff *skb,
+			      struct packet_type *pt_prev,
+			      struct net_device *orig_dev)
+{
+	atomic_inc(&skb->users);
+	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -1343,13 +1544,8 @@ static inline void net_timestamp(struct sk_buff *skb)
 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct packet_type *ptype;
-
-#ifdef CONFIG_NET_CLS_ACT
-	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-		net_timestamp(skb);
-#else
-	net_timestamp(skb);
-#endif
+	struct sk_buff *skb2 = NULL;
+	struct packet_type *pt_prev = NULL;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1359,10 +1555,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		if ((ptype->dev == dev || !ptype->dev) &&
 		    (ptype->af_packet_priv == NULL ||
 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
-			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
+			if (pt_prev) {
+				deliver_skb(skb2, pt_prev, skb->dev);
+				pt_prev = ptype;
+				continue;
+			}
+
+			skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (!skb2)
 				break;
 
+			net_timestamp_set(skb2);
+
 			/* skb->nh should be correctly
 			   set by sender, so that the second statement is
 			   just protection against buggy protocols.
@@ -1374,18 +1578,81 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 				if (net_ratelimit())
 					printk(KERN_CRIT "protocol %04x is "
 					       "buggy, dev %s\n",
-					       skb2->protocol, dev->name);
+					       ntohs(skb2->protocol),
+					       dev->name);
 				skb_reset_network_header(skb2);
 			}
 
 			skb2->transport_header = skb2->network_header;
 			skb2->pkt_type = PACKET_OUTGOING;
-			ptype->func(skb2, skb->dev, ptype, skb->dev);
+			pt_prev = ptype;
 		}
 	}
+	if (pt_prev)
+		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
 	rcu_read_unlock();
 }
 
+/*
+ * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
+ * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ */
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+	int rc;
+
+	if (txq < 1 || txq > dev->num_tx_queues)
+		return -EINVAL;
+
+	if (dev->reg_state == NETREG_REGISTERED) {
+		ASSERT_RTNL();
+
+		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
+						  txq);
+		if (rc)
+			return rc;
+
+		if (txq < dev->real_num_tx_queues)
+			qdisc_reset_all_tx_gt(dev, txq);
+	}
+
+	dev->real_num_tx_queues = txq;
+	return 0;
+}
+EXPORT_SYMBOL(netif_set_real_num_tx_queues);
+
+#ifdef CONFIG_RPS
+/**
+ *	netif_set_real_num_rx_queues - set actual number of RX queues used
+ *	@dev: Network device
+ *	@rxq: Actual number of RX queues
+ *
+ *	This must be called either with the rtnl_lock held or before
+ *	registration of the net device.  Returns 0 on success, or a
+ *	negative error code.  If called before registration, it always
+ *	succeeds.
+ */
+int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
+{
+	int rc;
+
+	if (rxq < 1 || rxq > dev->num_rx_queues)
+		return -EINVAL;
+
+	if (dev->reg_state == NETREG_REGISTERED) {
+		ASSERT_RTNL();
+
+		rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
+						  rxq);
+		if (rc)
+			return rc;
+	}
+
+	dev->real_num_rx_queues = rxq;
+	return 0;
+}
+EXPORT_SYMBOL(netif_set_real_num_rx_queues);
+#endif
 
 static inline void __netif_reschedule(struct Qdisc *q)
 {
@@ -1394,8 +1661,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
 
 	local_irq_save(flags);
 	sd = &__get_cpu_var(softnet_data);
-	q->next_sched = sd->output_queue;
-	sd->output_queue = q;
+	q->next_sched = NULL;
+	*sd->output_queue_tailp = q;
+	sd->output_queue_tailp = &q->next_sched;
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
 }
@@ -1464,31 +1732,35 @@ void netif_device_attach(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_device_attach);
 
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
-{
-	return ((features & NETIF_F_GEN_CSUM) ||
-		((features & NETIF_F_IP_CSUM) &&
-		 protocol == htons(ETH_P_IP)) ||
-		((features & NETIF_F_IPV6_CSUM) &&
-		 protocol == htons(ETH_P_IPV6)) ||
-		((features & NETIF_F_FCOE_CRC) &&
-		 protocol == htons(ETH_P_FCOE)));
-}
-
-static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
-{
-	if (can_checksum_protocol(dev->features, skb->protocol))
-		return true;
-
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-		if (can_checksum_protocol(dev->features & dev->vlan_features,
-					  veh->h_vlan_encapsulated_proto))
-			return true;
+/**
+ * skb_dev_set -- assign a new device to a buffer
+ * @skb: buffer for the new device
+ * @dev: network device
+ *
+ * If an skb is owned by a device already, we have to reset
+ * all data private to the namespace a device belongs to
+ * before assigning it a new device.
+ */
+#ifdef CONFIG_NET_NS
+void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
+{
+	skb_dst_drop(skb);
+	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
+		secpath_reset(skb);
+		nf_reset(skb);
+		skb_init_secmark(skb);
+		skb->mark = 0;
+		skb->priority = 0;
+		skb->nf_trace = 0;
+		skb->ipvs_property = 0;
+#ifdef CONFIG_NET_SCHED
+		skb->tc_index = 0;
+#endif
 	}
-
-	return false;
+	skb->dev = dev;
 }
+EXPORT_SYMBOL(skb_set_dev);
+#endif /* CONFIG_NET_NS */
 
 /*
  * Invalidate hardware checksum when packet is to be mangled, and
@@ -1507,7 +1779,7 @@ int skb_checksum_help(struct sk_buff *skb)
 		goto out_set_summed;
 	}
 
-	offset = skb->csum_start - skb_headroom(skb);
+	offset = skb_checksum_start_offset(skb);
 	BUG_ON(offset >= skb_headlen(skb));
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
@@ -1527,6 +1799,7 @@ out_set_summed:
 out:
 	return ret;
 }
+EXPORT_SYMBOL(skb_checksum_help);
 
 /**
  *	skb_gso_segment - Perform segmentation on skb.
@@ -1543,8 +1816,20 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_type *ptype;
 	__be16 type = skb->protocol;
+	int vlan_depth = ETH_HLEN;
 	int err;
 
+	while (type == htons(ETH_P_8021Q)) {
+		struct vlan_hdr *vh;
+
+		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
+			return ERR_PTR(-EINVAL);
+
+		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+		type = vh->h_vlan_encapsulated_proto;
+		vlan_depth += VLAN_HLEN;
+	}
+
 	skb_reset_mac_header(skb);
 	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
@@ -1556,8 +1841,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
 			dev->ethtool_ops->get_drvinfo(dev, &info);
 
-		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
-			"ip_summed=%d",
+		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
 		     info.driver, dev ? dev->features : 0L,
 		     skb->sk ? skb->sk->sk_route_caps : 0L,
 		     skb->len, skb->data_len, skb->ip_summed);
@@ -1589,7 +1873,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 
 	return segs;
 }
-
 EXPORT_SYMBOL(skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
@@ -1610,18 +1893,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
  * 2. No high memory really exists on this machine.
  */
 
-static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
+	if (!(dev->features & NETIF_F_HIGHDMA)) {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+			if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+				return 1;
+	}
 
-	if (dev->features & NETIF_F_HIGHDMA)
-		return 0;
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
-			return 1;
+	if (PCI_DMA_BUS_IS_PHYS) {
+		struct device *pdev = dev->dev.parent;
 
+		if (!pdev)
+			return 0;
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
+				return 1;
+		}
+	}
 #endif
 	return 0;
 }
@@ -1652,16 +1944,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
 /**
  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
  *	@skb: buffer to segment
+ *	@features: device features as applicable to this skb
  *
  *	This function segments the given skb and stores the list of segments
  *	in skb->next.
  */
-static int dev_gso_segment(struct sk_buff *skb)
+static int dev_gso_segment(struct sk_buff *skb, int features)
 {
-	struct net_device *dev = skb->dev;
 	struct sk_buff *segs;
-	int features = dev->features & ~(illegal_highdma(dev, skb) ?
-					 NETIF_F_SG : 0);
 
 	segs = skb_gso_segment(skb, features);
 
@@ -1679,22 +1969,97 @@ static int dev_gso_segment(struct sk_buff *skb)
 	return 0;
 }
 
+/*
+ * Try to orphan skb early, right before transmission by the device.
+ * We cannot orphan skb if tx timestamp is requested or the sk-reference
+ * is needed on driver level for other reasons, e.g. see net/can/raw.c
+ */
+static inline void skb_orphan_try(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	if (sk && !skb_shinfo(skb)->tx_flags) {
+		/* skb_tx_hash() wont be able to get sk.
+		 * We copy sk_hash into skb->rxhash
+		 */
+		if (!skb->rxhash)
+			skb->rxhash = sk->sk_hash;
+		skb_orphan(skb);
+	}
+}
+
+static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+{
+	return ((features & NETIF_F_GEN_CSUM) ||
+		((features & NETIF_F_V4_CSUM) &&
+		 protocol == htons(ETH_P_IP)) ||
+		((features & NETIF_F_V6_CSUM) &&
+		 protocol == htons(ETH_P_IPV6)) ||
+		((features & NETIF_F_FCOE_CRC) &&
+		 protocol == htons(ETH_P_FCOE)));
+}
+
+static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+{
+	if (!can_checksum_protocol(protocol, features)) {
+		features &= ~NETIF_F_ALL_CSUM;
+		features &= ~NETIF_F_SG;
+	} else if (illegal_highdma(skb->dev, skb)) {
+		features &= ~NETIF_F_SG;
+	}
+
+	return features;
+}
+
+int netif_skb_features(struct sk_buff *skb)
+{
+	__be16 protocol = skb->protocol;
+	int features = skb->dev->features;
+
+	if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+		protocol = veh->h_vlan_encapsulated_proto;
+	} else if (!vlan_tx_tag_present(skb)) {
+		return harmonize_features(skb, protocol, features);
+	}
+
+	features &= skb->dev->vlan_features;
+
+	if (protocol != htons(ETH_P_8021Q)) {
+		return harmonize_features(skb, protocol, features);
+	} else {
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
+				NETIF_F_GEN_CSUM;
+		return harmonize_features(skb, protocol, features);
+	}
+}
+EXPORT_SYMBOL(netif_skb_features);
+
+/*
+ * Returns true if either:
+ *	1. skb has frag_list and the device doesn't support FRAGLIST, or
+ *	2. skb is fragmented and the device does not support SG, or if
+ *	   at least one of fragments is in highmem and device does not
+ *	   support DMA from it.
+ */
+static inline int skb_needs_linearize(struct sk_buff *skb,
+				      int features)
+{
+	return skb_is_nonlinear(skb) &&
+			((skb_has_frag_list(skb) &&
+				!(features & NETIF_F_FRAGLIST)) ||
+			(skb_shinfo(skb)->nr_frags &&
+				!(features & NETIF_F_SG)));
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-	int rc;
+	int rc = NETDEV_TX_OK;
 
 	if (likely(!skb->next)) {
-		if (!list_empty(&ptype_all))
-			dev_queue_xmit_nit(skb, dev);
-
-		if (netif_needs_gso(dev, skb)) {
-			if (unlikely(dev_gso_segment(skb)))
-				goto out_kfree_skb;
-			if (skb->next)
-				goto gso;
-		}
+		int features;
 
 		/*
 		 * If device doesnt need skb->dst, release it right now while
@@ -1703,23 +2068,49 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(skb);
 
+		if (!list_empty(&ptype_all))
+			dev_queue_xmit_nit(skb, dev);
+
+		skb_orphan_try(skb);
+
+		features = netif_skb_features(skb);
+
+		if (vlan_tx_tag_present(skb) &&
+		    !(features & NETIF_F_HW_VLAN_TX)) {
+			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+			if (unlikely(!skb))
+				goto out;
+
+			skb->vlan_tci = 0;
+		}
+
+		if (netif_needs_gso(skb, features)) {
+			if (unlikely(dev_gso_segment(skb, features)))
+				goto out_kfree_skb;
+			if (skb->next)
+				goto gso;
+		} else {
+			if (skb_needs_linearize(skb, features) &&
+			    __skb_linearize(skb))
+				goto out_kfree_skb;
+
+			/* If packet is not checksummed and device does not
+			 * support checksumming for this protocol, complete
+			 * checksumming here.
+			 */
+			if (skb->ip_summed == CHECKSUM_PARTIAL) {
+				skb_set_transport_header(skb,
+					skb_checksum_start_offset(skb));
+				if (!(features & NETIF_F_ALL_CSUM) &&
+				     skb_checksum_help(skb))
+					goto out_kfree_skb;
+			}
+		}
+
 		rc = ops->ndo_start_xmit(skb, dev);
-		if (rc == 0)
+		trace_net_dev_xmit(skb, rc);
+		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
-		/*
-		 * TODO: if skb_orphan() was called by
-		 * dev->hard_start_xmit() (for example, the unmodified
-		 * igb driver does that; bnx2 doesn't), then
-		 * skb_tx_software_timestamp() will be unable to send
-		 * back the time stamp.
-		 *
-		 * How can this be prevented? Always create another
-		 * reference to the socket before calling
-		 * dev->hard_start_xmit()? Prevent that skb_orphan()
-		 * does anything in dev->hard_start_xmit() by clearing
-		 * the skb destructor before the call and restoring it
-		 * afterwards, then doing the skb_orphan() ourselves?
-		 */
 		return rc;
 	}
 
@@ -1729,8 +2120,19 @@ gso:
 
 		skb->next = nskb->next;
 		nskb->next = NULL;
+
+		/*
+		 * If device doesnt need nskb->dst, release it right now while
+		 * its hot in this cpu cache
+		 */
+		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+			skb_dst_drop(nskb);
+
 		rc = ops->ndo_start_xmit(nskb, dev);
-		if (unlikely(rc)) {
+		trace_net_dev_xmit(nskb, rc);
+		if (unlikely(rc != NETDEV_TX_OK)) {
+			if (rc & ~NETDEV_TX_MASK)
+				goto out_kfree_gso_skb;
 			nskb->next = skb->next;
 			skb->next = nskb;
 			return rc;
@@ -1740,52 +2142,195 @@ gso:
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
 
-	skb->destructor = DEV_GSO_CB(skb)->destructor;
-
+out_kfree_gso_skb:
+	if (likely(skb->next == NULL))
+		skb->destructor = DEV_GSO_CB(skb)->destructor;
 out_kfree_skb:
 	kfree_skb(skb);
-	return 0;
+out:
+	return rc;
 }
 
-static u32 skb_tx_hashrnd;
+static u32 hashrnd __read_mostly;
 
-u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues)
 {
 	u32 hash;
 
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
-		while (unlikely (hash >= dev->real_num_tx_queues))
-			hash -= dev->real_num_tx_queues;
+		while (unlikely(hash >= num_tx_queues))
+			hash -= num_tx_queues;
 		return hash;
 	}
 
 	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
 	else
-		hash = skb->protocol;
+		hash = (__force u16) skb->protocol ^ skb->rxhash;
+	hash = jhash_1word(hash, hashrnd);
+
+	return (u16) (((u64) hash * num_tx_queues) >> 32);
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
+static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
+{
+	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
+		if (net_ratelimit()) {
+			pr_warning("%s selects TX queue %d, but "
+				"real number of TX queues is %d\n",
+				dev->name, queue_index, dev->real_num_tx_queues);
+		}
+		return 0;
+	}
+	return queue_index;
+}
 
-	hash = jhash_1word(hash, skb_tx_hashrnd);
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int queue_index = -1;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		map = rcu_dereference(
+		    dev_maps->cpu_map[raw_smp_processor_id()]);
+		if (map) {
+			if (map->len == 1)
+				queue_index = map->queues[0];
+			else {
+				u32 hash;
+				if (skb->sk && skb->sk->sk_hash)
+					hash = skb->sk->sk_hash;
+				else
+					hash = (__force u16) skb->protocol ^
+					    skb->rxhash;
+				hash = jhash_1word(hash, hashrnd);
+				queue_index = map->queues[
+				    ((u64)hash * map->len) >> 32];
+			}
+			if (unlikely(queue_index >= dev->real_num_tx_queues))
+				queue_index = -1;
+		}
+	}
+	rcu_read_unlock();
 
-	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+	return queue_index;
+#else
+	return -1;
+#endif
 }
-EXPORT_SYMBOL(skb_tx_hash);
 
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
+	int queue_index;
 	const struct net_device_ops *ops = dev->netdev_ops;
-	u16 queue_index = 0;
 
-	if (ops->ndo_select_queue)
+	if (dev->real_num_tx_queues == 1)
+		queue_index = 0;
+	else if (ops->ndo_select_queue) {
 		queue_index = ops->ndo_select_queue(dev, skb);
-	else if (dev->real_num_tx_queues > 1)
-		queue_index = skb_tx_hash(dev, skb);
+		queue_index = dev_cap_txqueue(dev, queue_index);
+	} else {
+		struct sock *sk = skb->sk;
+		queue_index = sk_tx_queue_get(sk);
+
+		if (queue_index < 0 || skb->ooo_okay ||
+		    queue_index >= dev->real_num_tx_queues) {
+			int old_index = queue_index;
+
+			queue_index = get_xps_queue(dev, skb);
+			if (queue_index < 0)
+				queue_index = skb_tx_hash(dev, skb);
+
+			if (queue_index != old_index && sk) {
+				struct dst_entry *dst =
+				    rcu_dereference_check(sk->sk_dst_cache, 1);
+
+				if (dst && skb_dst(skb) == dst)
+					sk_tx_queue_set(sk, queue_index);
+			}
+		}
+	}
 
 	skb_set_queue_mapping(skb, queue_index);
 	return netdev_get_tx_queue(dev, queue_index);
 }
 
+static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+				 struct net_device *dev,
+				 struct netdev_queue *txq)
+{
+	spinlock_t *root_lock = qdisc_lock(q);
+	bool contended = qdisc_is_running(q);
+	int rc;
+
+	/*
+	 * Heuristic to force contended enqueues to serialize on a
+	 * separate lock before trying to get qdisc main lock.
+	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
+	 * and dequeue packets faster.
+	 */
+	if (unlikely(contended))
+		spin_lock(&q->busylock);
+
+	spin_lock(root_lock);
+	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+		kfree_skb(skb);
+		rc = NET_XMIT_DROP;
+	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
+		   qdisc_run_begin(q)) {
+		/*
+		 * This is a work-conserving queue; there are no old skbs
+		 * waiting to be sent out; and the qdisc is not running -
+		 * xmit the skb directly.
+		 */
+		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
+			skb_dst_force(skb);
+
+		qdisc_skb_cb(skb)->pkt_len = skb->len;
+		qdisc_bstats_update(q, skb);
+
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
+			__qdisc_run(q);
+		} else
+			qdisc_run_end(q);
+
+		rc = NET_XMIT_SUCCESS;
+	} else {
+		skb_dst_force(skb);
+		rc = qdisc_enqueue_root(skb, q);
+		if (qdisc_run_begin(q)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
+			__qdisc_run(q);
+		}
+	}
+	spin_unlock(root_lock);
+	if (unlikely(contended))
+		spin_unlock(&q->busylock);
+	return rc;
+}
+
+static DEFINE_PER_CPU(int, xmit_recursion);
+#define RECURSION_LIMIT 10
+
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -1818,60 +2363,20 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
-	/* GSO will handle the following emulations directly. */
-	if (netif_needs_gso(dev, skb))
-		goto gso;
-
-	if (skb_has_frags(skb) &&
-	    !(dev->features & NETIF_F_FRAGLIST) &&
-	    __skb_linearize(skb))
-		goto out_kfree_skb;
-
-	/* Fragmented skb is linearized if device does not support SG,
-	 * or if at least one of fragments is in highmem and device
-	 * does not support DMA from it.
-	 */
-	if (skb_shinfo(skb)->nr_frags &&
-	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
-	    __skb_linearize(skb))
-		goto out_kfree_skb;
-
-	/* If packet is not checksummed and device does not support
-	 * checksumming for this protocol, complete checksumming here.
-	 */
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		skb_set_transport_header(skb, skb->csum_start -
-					      skb_headroom(skb));
-		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
-			goto out_kfree_skb;
-	}
-
-gso:
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
 	rcu_read_lock_bh();
 
 	txq = dev_pick_tx(dev, skb);
-	q = rcu_dereference(txq->qdisc);
+	q = rcu_dereference_bh(txq->qdisc);
 
 #ifdef CONFIG_NET_CLS_ACT
-	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
 #endif
+	trace_net_dev_queue(skb);
 	if (q->enqueue) {
-		spinlock_t *root_lock = qdisc_lock(q);
-
-		spin_lock(root_lock);
-
-		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-			kfree_skb(skb);
-			rc = NET_XMIT_DROP;
-		} else {
-			rc = qdisc_enqueue_root(skb, q);
-			qdisc_run(q);
-		}
-		spin_unlock(root_lock);
-
+		rc = __dev_xmit_skb(skb, q, dev, txq);
 		goto out;
 	}
 
@@ -1892,11 +2397,16 @@ gso:
 
 		if (txq->xmit_lock_owner != cpu) {
 
+			if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+				goto recursion_alert;
+
 			HARD_TX_LOCK(dev, txq, cpu);
 
 			if (!netif_tx_queue_stopped(txq)) {
-				rc = 0;
-				if (!dev_hard_start_xmit(skb, dev, txq)) {
+				__this_cpu_inc(xmit_recursion);
+				rc = dev_hard_start_xmit(skb, dev, txq);
+				__this_cpu_dec(xmit_recursion);
+				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
 					goto out;
 				}
@@ -1907,7 +2417,9 @@ gso:
 				       "queue packet!\n", dev->name);
 		} else {
 			/* Recursion is detected! It is possible,
-			 * unfortunately */
+			 * unfortunately
+			 */
+recursion_alert:
 			if (net_ratelimit())
 				printk(KERN_CRIT "Dead loop on virtual device "
 				       "%s, fix it urgently!\n", dev->name);
@@ -1917,13 +2429,13 @@ gso:
 	rc = -ENETDOWN;
 	rcu_read_unlock_bh();
 
-out_kfree_skb:
 	kfree_skb(skb);
 	return rc;
 out:
 	rcu_read_unlock_bh();
 	return rc;
 }
+EXPORT_SYMBOL(dev_queue_xmit);
 
 
 /*=======================================================================
@@ -1931,11 +2443,267 @@ out:
   =======================================================================*/
 
 int netdev_max_backlog __read_mostly = 1000;
+int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
-DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+/* Called with irq disabled */
+static inline void ____napi_schedule(struct softnet_data *sd,
+				     struct napi_struct *napi)
+{
+	list_add_tail(&napi->poll_list, &sd->poll_list);
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+}
+
+/*
+ * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers. Returns a non-zero hash number on success
+ * and 0 on failure.
+ */
+__u32 __skb_get_rxhash(struct sk_buff *skb)
+{
+	int nhoff, hash = 0, poff;
+	struct ipv6hdr *ip6;
+	struct iphdr *ip;
+	u8 ip_proto;
+	u32 addr1, addr2, ihl;
+	union {
+		u32 v32;
+		u16 v16[2];
+	} ports;
+
+	nhoff = skb_network_offset(skb);
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
+			goto done;
+
+		ip = (struct iphdr *) (skb->data + nhoff);
+		if (ip->frag_off & htons(IP_MF | IP_OFFSET))
+			ip_proto = 0;
+		else
+			ip_proto = ip->protocol;
+		addr1 = (__force u32) ip->saddr;
+		addr2 = (__force u32) ip->daddr;
+		ihl = ip->ihl;
+		break;
+	case __constant_htons(ETH_P_IPV6):
+		if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
+			goto done;
+
+		ip6 = (struct ipv6hdr *) (skb->data + nhoff);
+		ip_proto = ip6->nexthdr;
+		addr1 = (__force u32) ip6->saddr.s6_addr32[3];
+		addr2 = (__force u32) ip6->daddr.s6_addr32[3];
+		ihl = (40 >> 2);
+		break;
+	default:
+		goto done;
+	}
+
+	ports.v32 = 0;
+	poff = proto_ports_offset(ip_proto);
+	if (poff >= 0) {
+		nhoff += ihl * 4 + poff;
+		if (pskb_may_pull(skb, nhoff + 4)) {
+			ports.v32 = * (__force u32 *) (skb->data + nhoff);
+			if (ports.v16[1] < ports.v16[0])
+				swap(ports.v16[0], ports.v16[1]);
+		}
+	}
+
+	/* get a consistent hash (same value on both flow directions) */
+	if (addr2 < addr1)
+		swap(addr1, addr2);
+
+	hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+	if (!hash)
+		hash = 1;
+
+done:
+	return hash;
+}
+EXPORT_SYMBOL(__skb_get_rxhash);
+
+#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+		       struct rps_dev_flow **rflowp)
+{
+	struct netdev_rx_queue *rxqueue;
+	struct rps_map *map;
+	struct rps_dev_flow_table *flow_table;
+	struct rps_sock_flow_table *sock_flow_table;
+	int cpu = -1;
+	u16 tcpu;
+
+	if (skb_rx_queue_recorded(skb)) {
+		u16 index = skb_get_rx_queue(skb);
+		if (unlikely(index >= dev->real_num_rx_queues)) {
+			WARN_ONCE(dev->real_num_rx_queues > 1,
+				  "%s received packet on queue %u, but number "
+				  "of RX queues is %u\n",
+				  dev->name, index, dev->real_num_rx_queues);
+			goto done;
+		}
+		rxqueue = dev->_rx + index;
+	} else
+		rxqueue = dev->_rx;
+
+	map = rcu_dereference(rxqueue->rps_map);
+	if (map) {
+		if (map->len == 1) {
+			tcpu = map->cpus[0];
+			if (cpu_online(tcpu))
+				cpu = tcpu;
+			goto done;
+		}
+	} else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
+		goto done;
+	}
+
+	skb_reset_network_header(skb);
+	if (!skb_get_rxhash(skb))
+		goto done;
+
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	if (flow_table && sock_flow_table) {
+		u16 next_cpu;
+		struct rps_dev_flow *rflow;
+
+		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+		tcpu = rflow->cpu;
+
+		next_cpu = sock_flow_table->ents[skb->rxhash &
+		    sock_flow_table->mask];
 
+		/*
+		 * If the desired CPU (where last recvmsg was done) is
+		 * different from current CPU (one in the rx-queue flow
+		 * table entry), switch if one of the following holds:
+		 *   - Current CPU is unset (equal to RPS_NO_CPU).
+		 *   - Current CPU is offline.
+		 *   - The current CPU's queue tail has advanced beyond the
+		 *     last packet that was enqueued using this table entry.
+		 *     This guarantees that all previous packets for the flow
+		 *     have been dequeued, thus preserving in order delivery.
+		 */
+		if (unlikely(tcpu != next_cpu) &&
+		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+		      rflow->last_qtail)) >= 0)) {
+			tcpu = rflow->cpu = next_cpu;
+			if (tcpu != RPS_NO_CPU)
+				rflow->last_qtail = per_cpu(softnet_data,
+				    tcpu).input_queue_head;
+		}
+		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+			*rflowp = rflow;
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
+	if (map) {
+		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+
+		if (cpu_online(tcpu)) {
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
+done:
+	return cpu;
+}
+
+/* Called from hardirq (IPI) context */
+static void rps_trigger_softirq(void *data)
+{
+	struct softnet_data *sd = data;
+
+	____napi_schedule(sd, &sd->backlog);
+	sd->received_rps++;
+}
+
+#endif /* CONFIG_RPS */
+
+/*
+ * Check if this softnet_data structure is another cpu one
+ * If yes, queue it to our IPI list and return 1
+ * If no, return 0
+ */
+static int rps_ipi_queued(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
+
+	if (sd != mysd) {
+		sd->rps_ipi_next = mysd->rps_ipi_list;
+		mysd->rps_ipi_list = sd;
+
+		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+		return 1;
+	}
+#endif /* CONFIG_RPS */
+	return 0;
+}
+
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+			      unsigned int *qtail)
+{
+	struct softnet_data *sd;
+	unsigned long flags;
+
+	sd = &per_cpu(softnet_data, cpu);
+
+	local_irq_save(flags);
+
+	rps_lock(sd);
+	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+		if (skb_queue_len(&sd->input_pkt_queue)) {
+enqueue:
+			__skb_queue_tail(&sd->input_pkt_queue, skb);
+			input_queue_tail_incr_save(sd, qtail);
+			rps_unlock(sd);
+			local_irq_restore(flags);
+			return NET_RX_SUCCESS;
+		}
+
+		/* Schedule NAPI for backlog device
+		 * We can use non atomic operation since we own the queue lock
+		 */
+		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
+			if (!rps_ipi_queued(sd))
+				____napi_schedule(sd, &sd->backlog);
+		}
+		goto enqueue;
+	}
+
+	sd->dropped++;
+	rps_unlock(sd);
+
+	local_irq_restore(flags);
+
+	atomic_long_inc(&skb->dev->rx_dropped);
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
 
 /**
  *	netif_rx	-	post buffer to the network code
@@ -1954,42 +2722,43 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
 int netif_rx(struct sk_buff *skb)
 {
-	struct softnet_data *queue;
-	unsigned long flags;
+	int ret;
 
 	/* if netpoll wants it, pretend we never saw it */
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
 
-	/*
-	 * The code is rearranged so that the path is the most
-	 * short when CPU is congested, but is still operating.
-	 */
-	local_irq_save(flags);
-	queue = &__get_cpu_var(softnet_data);
+	trace_netif_rx(skb);
+#ifdef CONFIG_RPS
+	{
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
+		int cpu;
 
-	__get_cpu_var(netdev_rx_stat).total++;
-	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (queue->input_pkt_queue.qlen) {
-enqueue:
-			__skb_queue_tail(&queue->input_pkt_queue, skb);
-			local_irq_restore(flags);
-			return NET_RX_SUCCESS;
-		}
+		preempt_disable();
+		rcu_read_lock();
 
-		napi_schedule(&queue->backlog);
-		goto enqueue;
-	}
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
+		if (cpu < 0)
+			cpu = smp_processor_id();
 
-	__get_cpu_var(netdev_rx_stat).dropped++;
-	local_irq_restore(flags);
+		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 
-	kfree_skb(skb);
-	return NET_RX_DROP;
+		rcu_read_unlock();
+		preempt_enable();
+	}
+#else
+	{
+		unsigned int qtail;
+		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+		put_cpu();
+	}
+#endif
+	return ret;
 }
+EXPORT_SYMBOL(netif_rx);
 
 int netif_rx_ni(struct sk_buff *skb)
 {
@@ -2003,7 +2772,6 @@ int netif_rx_ni(struct sk_buff *skb)
 
 	return err;
 }
-
 EXPORT_SYMBOL(netif_rx_ni);
 
 static void net_tx_action(struct softirq_action *h)
@@ -2023,6 +2791,7 @@ static void net_tx_action(struct softirq_action *h)
 			clist = clist->next;
 
 			WARN_ON(atomic_read(&skb->users));
+			trace_kfree_skb(skb, net_tx_action);
 			__kfree_skb(skb);
 		}
 	}
@@ -2033,6 +2802,7 @@ static void net_tx_action(struct softirq_action *h)
 		local_irq_disable();
 		head = sd->output_queue;
 		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
 		local_irq_enable();
 
 		while (head) {
@@ -2062,72 +2832,12 @@ static void net_tx_action(struct softirq_action *h)
 	}
 }
 
-static inline int deliver_skb(struct sk_buff *skb,
-			      struct packet_type *pt_prev,
-			      struct net_device *orig_dev)
-{
-	atomic_inc(&skb->users);
-	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-}
-
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
-EXPORT_SYMBOL(br_fdb_test_addr_hook);
-#endif
-
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	if (skb->dev->macvlan_port == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
+EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -2139,26 +2849,23 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
  * the ingress scheduler, you just cant add policies on ingress.
  *
  */
-static int ing_filter(struct sk_buff *skb)
+static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 {
 	struct net_device *dev = skb->dev;
 	u32 ttl = G_TC_RTTL(skb->tc_verd);
-	struct netdev_queue *rxq;
 	int result = TC_ACT_OK;
 	struct Qdisc *q;
 
-	if (MAX_RED_LOOP < ttl++) {
-		printk(KERN_WARNING
-		       "Redir loop detected Dropping packet (%d->%d)\n",
-		       skb->iif, dev->ifindex);
+	if (unlikely(MAX_RED_LOOP < ttl++)) {
+		if (net_ratelimit())
+			pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
+			       skb->skb_iif, dev->ifindex);
 		return TC_ACT_SHOT;
 	}
 
 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-	rxq = &dev->rx_queue;
-
 	q = rxq->qdisc;
 	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
@@ -2174,18 +2881,17 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 					 struct packet_type **pt_prev,
 					 int *ret, struct net_device *orig_dev)
 {
-	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
+	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+
+	if (!rxq || rxq->qdisc == &noop_qdisc)
 		goto out;
 
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
-	} else {
-		/* Huh? Why does turning on AF_PACKET affect this? */
-		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
 	}
 
-	switch (ing_filter(skb)) {
+	switch (ing_filter(skb, rxq)) {
 	case TC_ACT_SHOT:
 	case TC_ACT_STOLEN:
 		kfree_skb(skb);
@@ -2198,80 +2904,145 @@ out:
 }
 #endif
 
-/*
- * 	netif_nit_deliver - deliver received packets to network taps
- * 	@skb: buffer
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rx_handler: receive handler to register
+ *	@rx_handler_data: data pointer that is used by rx handler
  *
- * 	This function is used to deliver incoming packets to network
- * 	taps. It should be used when the normal netif_receive_skb path
- * 	is bypassed, for example because of VLAN acceleration.
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
  */
-void netif_nit_deliver(struct sk_buff *skb)
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_handler_func_t *rx_handler,
+			       void *rx_handler_data)
 {
-	struct packet_type *ptype;
+	ASSERT_RTNL();
 
-	if (list_empty(&ptype_all))
-		return;
+	if (dev->rx_handler)
+		return -EBUSY;
 
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
+	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
+	rcu_assign_pointer(dev->rx_handler, rx_handler);
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (!ptype->dev || ptype->dev == skb->dev)
-			deliver_skb(skb, ptype, skb->dev);
-	}
-	rcu_read_unlock();
+	return 0;
 }
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
 
 /**
- *	netif_receive_skb - process receive buffer from network
- *	@skb: buffer to process
- *
- *	netif_receive_skb() is the main receive data processing function.
- *	It always succeeds. The buffer may be dropped during processing
- *	for congestion control or by the protocol layers.
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
  *
- *	This function may only be called from softirq context and interrupts
- *	should be enabled.
+ *	Unregister a receive hander from a device.
  *
- *	Return values (usually ignored):
- *	NET_RX_SUCCESS: no congestion
- *	NET_RX_DROP: packet was dropped
+ *	The caller must hold the rtnl_mutex.
  */
-int netif_receive_skb(struct sk_buff *skb)
+void netdev_rx_handler_unregister(struct net_device *dev)
+{
+
+	ASSERT_RTNL();
+	rcu_assign_pointer(dev->rx_handler, NULL);
+	rcu_assign_pointer(dev->rx_handler_data, NULL);
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
+static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
+					      struct net_device *master)
+{
+	if (skb->pkt_type == PACKET_HOST) {
+		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
+
+		memcpy(dest, master->dev_addr, ETH_ALEN);
+	}
+}
+
+/* On bonding slaves other than the currently active slave, suppress
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
+ */
+int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
+{
+	struct net_device *dev = skb->dev;
+
+	if (master->priv_flags & IFF_MASTER_ARPMON)
+		dev->last_rx = jiffies;
+
+	if ((master->priv_flags & IFF_MASTER_ALB) &&
+	    (master->priv_flags & IFF_BRIDGE_PORT)) {
+		/* Do address unmangle. The local destination address
+		 * will be always the one master has. Provides the right
+		 * functionality in a bridge.
+		 */
+		skb_bond_set_mac_by_master(skb, master);
+	}
+
+	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
+			return 0;
+
+		if (master->priv_flags & IFF_MASTER_ALB) {
+			if (skb->pkt_type != PACKET_BROADCAST &&
+			    skb->pkt_type != PACKET_MULTICAST)
+				return 0;
+		}
+		if (master->priv_flags & IFF_MASTER_8023AD &&
+		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
+			return 0;
+
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(__skb_bond_should_drop);
+
+static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
+	struct net_device *master;
 	struct net_device *null_or_orig;
+	struct net_device *orig_or_bond;
 	int ret = NET_RX_DROP;
 	__be16 type;
 
-	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
-		return NET_RX_SUCCESS;
+	if (!netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
+
+	trace_netif_receive_skb(skb);
 
 	/* if we've gotten here through NAPI, check netpoll */
 	if (netpoll_receive_skb(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
-
-	if (!skb->iif)
-		skb->iif = skb->dev->ifindex;
+	if (!skb->skb_iif)
+		skb->skb_iif = skb->dev->ifindex;
 
+	/*
+	 * bonding note: skbs received on inactive slaves should only
+	 * be delivered to pkt handlers that are exact matches.  Also
+	 * the deliver_no_wcard flag will be set.  If packet handlers
+	 * are sensitive to duplicate packets these skbs will need to
+	 * be dropped at the handler.
+	 */
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
-	if (orig_dev->master) {
-		if (skb_bond_should_drop(skb))
+	master = ACCESS_ONCE(orig_dev->master);
+	if (skb->deliver_no_wcard)
+		null_or_orig = orig_dev;
+	else if (master) {
+		if (skb_bond_should_drop(skb, master)) {
+			skb->deliver_no_wcard = 1;
 			null_or_orig = orig_dev; /* deliver only exact match */
-		else
-			skb->dev = orig_dev->master;
+		} else
+			skb->dev = master;
 	}
 
-	__get_cpu_var(netdev_rx_stat).total++;
-
+	__this_cpu_inc(softnet_data.processed);
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 	skb->mac_len = skb->network_header - skb->mac_header;
@@ -2303,21 +3074,48 @@ int netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/* Handle special case of bridge or macvlan */
+	rx_handler = rcu_dereference(skb->dev->rx_handler);
+	if (rx_handler) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rx_handler(skb);
+		if (!skb)
+			goto out;
+	}
 
-	skb_orphan(skb);
+	if (vlan_tx_tag_present(skb)) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		if (vlan_hwaccel_do_receive(&skb)) {
+			ret = __netif_receive_skb(skb);
+			goto out;
+		} else if (unlikely(!skb))
+			goto out;
+	}
+
+	/*
+	 * Make sure frames received on VLAN interfaces stacked on
+	 * bonding interfaces still make their way to any base bonding
+	 * device that may have registered for a specific ptype.  The
+	 * handler may have to adjust skb->dev and orig_dev.
+	 */
+	orig_or_bond = orig_dev;
+	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
+	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
+		orig_or_bond = vlan_dev_real_dev(skb->dev);
+	}
 
 	type = skb->protocol;
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-		if (ptype->type == type &&
-		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
-		     ptype->dev == orig_dev)) {
+		if (ptype->type == type && (ptype->dev == null_or_orig ||
+		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
+		     ptype->dev == orig_or_bond)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
@@ -2327,6 +3125,7 @@ ncls:
 	if (pt_prev) {
 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 	} else {
+		atomic_long_inc(&skb->dev->rx_dropped);
 		kfree_skb(skb);
 		/* Jamal, now you will not able to escape explaining
 		 * me how you were going to use this. :-)
@@ -2339,18 +3138,80 @@ out:
 	return ret;
 }
 
-/* Network device is going away, flush any packets still pending  */
+/**
+ *	netif_receive_skb - process receive buffer from network
+ *	@skb: buffer to process
+ *
+ *	netif_receive_skb() is the main receive data processing function.
+ *	It always succeeds. The buffer may be dropped during processing
+ *	for congestion control or by the protocol layers.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ *
+ *	Return values (usually ignored):
+ *	NET_RX_SUCCESS: no congestion
+ *	NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
+
+	if (skb_defer_rx_timestamp(skb))
+		return NET_RX_SUCCESS;
+
+#ifdef CONFIG_RPS
+	{
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
+		int cpu, ret;
+
+		rcu_read_lock();
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
+
+		if (cpu >= 0) {
+			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+			rcu_read_unlock();
+		} else {
+			rcu_read_unlock();
+			ret = __netif_receive_skb(skb);
+		}
+
+		return ret;
+	}
+#else
+	return __netif_receive_skb(skb);
+#endif
+}
+EXPORT_SYMBOL(netif_receive_skb);
+
+/* Network device is going away, flush any packets still pending
+ * Called with irqs disabled.
+ */
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	struct sk_buff *skb, *tmp;
 
-	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+	rps_lock(sd);
+	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
+		if (skb->dev == dev) {
+			__skb_unlink(skb, &sd->input_pkt_queue);
+			kfree_skb(skb);
+			input_queue_head_incr(sd);
+		}
+	}
+	rps_unlock(sd);
+
+	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
 		if (skb->dev == dev) {
-			__skb_unlink(skb, &queue->input_pkt_queue);
+			__skb_unlink(skb, &sd->process_queue);
 			kfree_skb(skb);
+			input_queue_head_incr(sd);
 		}
+	}
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -2385,7 +3246,7 @@ out:
 	return netif_receive_skb(skb);
 }
 
-void napi_gro_flush(struct napi_struct *napi)
+inline void napi_gro_flush(struct napi_struct *napi)
 {
 	struct sk_buff *skb, *next;
 
@@ -2400,7 +3261,7 @@ void napi_gro_flush(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
 	struct packet_type *ptype;
@@ -2408,12 +3269,12 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
 	int same_flow;
 	int mac_len;
-	int ret;
+	enum gro_result ret;
 
-	if (!(skb->dev->features & NETIF_F_GRO))
+	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
 		goto normal;
 
-	if (skb_is_gso(skb) || skb_has_frags(skb))
+	if (skb_is_gso(skb) || skb_has_frag_list(skb))
 		goto normal;
 
 	rcu_read_lock();
@@ -2479,7 +3340,7 @@ pull:
 			put_page(skb_shinfo(skb)->frags[0].page);
 			memmove(skb_shinfo(skb)->frags,
 				skb_shinfo(skb)->frags + 1,
-				--skb_shinfo(skb)->nr_frags);
+				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
 		}
 	}
 
@@ -2492,41 +3353,44 @@ normal:
 }
 EXPORT_SYMBOL(dev_gro_receive);
 
-static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static inline gro_result_t
+__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff *p;
 
-	if (netpoll_rx_on(skb))
-		return GRO_NORMAL;
-
 	for (p = napi->gro_list; p; p = p->next) {
-		NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
-			&& !compare_ether_header(skb_mac_header(p),
-						 skb_gro_mac_header(skb));
+		unsigned long diffs;
+
+		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+		diffs |= p->vlan_tci ^ skb->vlan_tci;
+		diffs |= compare_ether_header(skb_mac_header(p),
+					      skb_gro_mac_header(skb));
+		NAPI_GRO_CB(p)->same_flow = !diffs;
 		NAPI_GRO_CB(p)->flush = 0;
 	}
 
 	return dev_gro_receive(napi, skb);
 }
 
-int napi_skb_finish(int ret, struct sk_buff *skb)
+gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
-	int err = NET_RX_SUCCESS;
-
 	switch (ret) {
 	case GRO_NORMAL:
-		return netif_receive_skb(skb);
+		if (netif_receive_skb(skb))
+			ret = GRO_DROP;
+		break;
 
 	case GRO_DROP:
-		err = NET_RX_DROP;
-		/* fall through */
-
 	case GRO_MERGED_FREE:
 		kfree_skb(skb);
 		break;
+
+	case GRO_HELD:
+	case GRO_MERGED:
+		break;
 	}
 
-	return err;
+	return ret;
 }
 EXPORT_SYMBOL(napi_skb_finish);
 
@@ -2546,7 +3410,7 @@ void skb_gro_reset_offset(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(skb_gro_reset_offset);
 
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	skb_gro_reset_offset(skb);
 
@@ -2554,60 +3418,52 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
-void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 {
 	__skb_pull(skb, skb_headlen(skb));
 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+	skb->vlan_tci = 0;
 
 	napi->skb = skb;
 }
-EXPORT_SYMBOL(napi_reuse_skb);
 
 struct sk_buff *napi_get_frags(struct napi_struct *napi)
 {
-	struct net_device *dev = napi->dev;
 	struct sk_buff *skb = napi->skb;
 
 	if (!skb) {
-		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
-		if (!skb)
-			goto out;
-
-		skb_reserve(skb, NET_IP_ALIGN);
-
-		napi->skb = skb;
+		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
+		if (skb)
+			napi->skb = skb;
 	}
-
-out:
 	return skb;
 }
 EXPORT_SYMBOL(napi_get_frags);
 
-int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
+gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
+			       gro_result_t ret)
 {
-	int err = NET_RX_SUCCESS;
-
 	switch (ret) {
 	case GRO_NORMAL:
 	case GRO_HELD:
-		skb->protocol = eth_type_trans(skb, napi->dev);
-
-		if (ret == GRO_NORMAL)
-			return netif_receive_skb(skb);
+		skb->protocol = eth_type_trans(skb, skb->dev);
 
-		skb_gro_pull(skb, -ETH_HLEN);
+		if (ret == GRO_HELD)
+			skb_gro_pull(skb, -ETH_HLEN);
+		else if (netif_receive_skb(skb))
+			ret = GRO_DROP;
 		break;
 
 	case GRO_DROP:
-		err = NET_RX_DROP;
-		/* fall through */
-
 	case GRO_MERGED_FREE:
 		napi_reuse_skb(napi, skb);
 		break;
+
+	case GRO_MERGED:
+		break;
 	}
 
-	return err;
+	return ret;
 }
 EXPORT_SYMBOL(napi_frags_finish);
 
@@ -2648,38 +3504,98 @@ out:
 }
 EXPORT_SYMBOL(napi_frags_skb);
 
-int napi_gro_frags(struct napi_struct *napi)
+gro_result_t napi_gro_frags(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi_frags_skb(napi);
 
 	if (!skb)
-		return NET_RX_DROP;
+		return GRO_DROP;
 
 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
 }
 EXPORT_SYMBOL(napi_gro_frags);
 
+/*
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
+ */
+static void net_rps_action_and_irq_enable(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	struct softnet_data *remsd = sd->rps_ipi_list;
+
+	if (remsd) {
+		sd->rps_ipi_list = NULL;
+
+		local_irq_enable();
+
+		/* Send pending IPI's to kick RPS processing on remote cpus. */
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
+
+			if (cpu_online(remsd->cpu))
+				__smp_call_function_single(remsd->cpu,
+							   &remsd->csd, 0);
+			remsd = next;
+		}
+	} else
+#endif
+		local_irq_enable();
+}
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
-	unsigned long start_time = jiffies;
+	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
 
+#ifdef CONFIG_RPS
+	/* Check if we have pending ipi, its better to send them now,
+	 * not waiting net_rx_action() end.
+	 */
+	if (sd->rps_ipi_list) {
+		local_irq_disable();
+		net_rps_action_and_irq_enable(sd);
+	}
+#endif
 	napi->weight = weight_p;
-	do {
+	local_irq_disable();
+	while (work < quota) {
 		struct sk_buff *skb;
+		unsigned int qlen;
 
-		local_irq_disable();
-		skb = __skb_dequeue(&queue->input_pkt_queue);
-		if (!skb) {
-			__napi_complete(napi);
+		while ((skb = __skb_dequeue(&sd->process_queue))) {
 			local_irq_enable();
-			break;
+			__netif_receive_skb(skb);
+			local_irq_disable();
+			input_queue_head_incr(sd);
+			if (++work >= quota) {
+				local_irq_enable();
+				return work;
+			}
 		}
-		local_irq_enable();
 
-		netif_receive_skb(skb);
-	} while (++work < quota && jiffies == start_time);
+		rps_lock(sd);
+		qlen = skb_queue_len(&sd->input_pkt_queue);
+		if (qlen)
+			skb_queue_splice_tail_init(&sd->input_pkt_queue,
+						   &sd->process_queue);
+
+		if (qlen < quota - work) {
+			/*
+			 * Inline a custom version of __napi_complete().
+			 * only current cpu owns and manipulates this napi,
+			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
+			 * we can use a plain write instead of clear_bit(),
+			 * and we dont need an smp_mb() memory barrier.
+			 */
+			list_del(&napi->poll_list);
+			napi->state = 0;
+
+			quota = work + qlen;
+		}
+		rps_unlock(sd);
+	}
+	local_irq_enable();
 
 	return work;
 }
@@ -2695,8 +3611,7 @@ void __napi_schedule(struct napi_struct *n)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	____napi_schedule(&__get_cpu_var(softnet_data), n);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__napi_schedule);
@@ -2767,17 +3682,16 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-
 static void net_rx_action(struct softirq_action *h)
 {
-	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
 
 	local_irq_disable();
 
-	while (!list_empty(list)) {
+	while (!list_empty(&sd->poll_list)) {
 		struct napi_struct *n;
 		int work, weight;
 
@@ -2795,7 +3709,7 @@ static void net_rx_action(struct softirq_action *h)
 		 * entries to the tail of this list, and only ->poll()
 		 * calls can remove this head entry from the list.
 		 */
-		n = list_entry(list->next, struct napi_struct, poll_list);
+		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
 
 		have = netpoll_poll_lock(n);
 
@@ -2825,16 +3739,18 @@ static void net_rx_action(struct softirq_action *h)
 		 * move the instance around on the list at-will.
 		 */
 		if (unlikely(work == weight)) {
-			if (unlikely(napi_disable_pending(n)))
-				__napi_complete(n);
-			else
-				list_move_tail(&n->poll_list, list);
+			if (unlikely(napi_disable_pending(n))) {
+				local_irq_enable();
+				napi_complete(n);
+				local_irq_disable();
+			} else
+				list_move_tail(&n->poll_list, &sd->poll_list);
 		}
 
 		netpoll_poll_unlock(have);
 	}
 out:
-	local_irq_enable();
+	net_rps_action_and_irq_enable(sd);
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -2847,12 +3763,12 @@ out:
 	return;
 
 softnet_break:
-	__get_cpu_var(netdev_rx_stat).time_squeeze++;
+	sd->time_squeeze++;
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 	goto out;
 }
 
-static gifconf_func_t * gifconf_list [NPROTO];
+static gifconf_func_t *gifconf_list[NPROTO];
 
 /**
  *	register_gifconf	-	register a SIOCGIF handler
@@ -2863,13 +3779,14 @@ static gifconf_func_t * gifconf_list [NPROTO];
  *	that is passed must not be freed or reused until it has been replaced
  *	by another handler.
  */
-int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
+int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
 {
 	if (family >= NPROTO)
 		return -EINVAL;
 	gifconf_list[family] = gifconf;
 	return 0;
 }
+EXPORT_SYMBOL(register_gifconf);
 
 
 /*
@@ -2895,15 +3812,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 		return -EFAULT;
 
-	read_lock(&dev_base_lock);
-	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
 	if (!dev) {
-		read_unlock(&dev_base_lock);
+		rcu_read_unlock();
 		return -ENODEV;
 	}
 
 	strcpy(ifr.ifr_name, dev->name);
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 
 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
 		return -EFAULT;
@@ -2973,18 +3890,18 @@ static int dev_ifconf(struct net *net, char __user *arg)
  *	in detail.
  */
 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(dev_base_lock)
+	__acquires(RCU)
 {
 	struct net *net = seq_file_net(seq);
 	loff_t off;
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	if (!*pos)
 		return SEQ_START_TOKEN;
 
 	off = 1;
-	for_each_netdev(net, dev)
+	for_each_netdev_rcu(net, dev)
 		if (off++ == *pos)
 			return dev;
 
@@ -2993,24 +3910,27 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net *net = seq_file_net(seq);
+	struct net_device *dev = (v == SEQ_START_TOKEN) ?
+				  first_net_device(seq_file_net(seq)) :
+				  next_net_device((struct net_device *)v);
+
 	++*pos;
-	return v == SEQ_START_TOKEN ?
-		first_net_device(net) : next_net_device((struct net_device *)v);
+	return rcu_dereference(dev);
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
-	__releases(dev_base_lock)
+	__releases(RCU)
 {
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	const struct net_device_stats *stats = dev_get_stats(dev);
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
 
-	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
-		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
 		   dev->name, stats->rx_bytes, stats->rx_packets,
 		   stats->rx_errors,
 		   stats->rx_dropped + stats->rx_missed_errors,
@@ -3045,17 +3965,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct netif_rx_stats *softnet_get_online(loff_t *pos)
+static struct softnet_data *softnet_get_online(loff_t *pos)
 {
-	struct netif_rx_stats *rc = NULL;
+	struct softnet_data *sd = NULL;
 
 	while (*pos < nr_cpu_ids)
 		if (cpu_online(*pos)) {
-			rc = &per_cpu(netdev_rx_stat, *pos);
+			sd = &per_cpu(softnet_data, *pos);
 			break;
 		} else
 			++*pos;
-	return rc;
+	return sd;
 }
 
 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3075,12 +3995,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
 
 static int softnet_seq_show(struct seq_file *seq, void *v)
 {
-	struct netif_rx_stats *s = v;
+	struct softnet_data *sd = v;
 
-	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-		   s->total, s->dropped, s->time_squeeze, 0,
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		   sd->processed, sd->dropped, sd->time_squeeze, 0,
 		   0, 0, 0, 0, /* was fastroute */
-		   s->cpu_collision );
+		   sd->cpu_collision, sd->received_rps);
 	return 0;
 }
 
@@ -3303,11 +4223,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 
 	slave->master = master;
 
-	synchronize_net();
-
-	if (old)
+	if (old) {
+		synchronize_net();
 		dev_put(old);
-
+	}
 	if (master)
 		slave->flags |= IFF_SLAVE;
 	else
@@ -3316,6 +4235,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
 	return 0;
 }
+EXPORT_SYMBOL(netdev_set_master);
 
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
@@ -3394,6 +4314,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
 		dev_set_rx_mode(dev);
 	return err;
 }
+EXPORT_SYMBOL(dev_set_promiscuity);
 
 /**
  *	dev_set_allmulti	- update allmulti count on a device
@@ -3437,6 +4358,7 @@ int dev_set_allmulti(struct net_device *dev, int inc)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(dev_set_allmulti);
 
 /*
  *	Upload unicast and multicast address lists to device and
@@ -3461,10 +4383,10 @@ void __dev_set_rx_mode(struct net_device *dev)
 		/* Unicast addresses changes may only happen under the rtnl,
 		 * therefore calling __dev_set_promiscuity here is safe.
 		 */
-		if (dev->uc.count > 0 && !dev->uc_promisc) {
+		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
 			__dev_set_promiscuity(dev, 1);
 			dev->uc_promisc = 1;
-		} else if (dev->uc.count == 0 && dev->uc_promisc) {
+		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
 			__dev_set_promiscuity(dev, -1);
 			dev->uc_promisc = 0;
 		}
@@ -3481,555 +4403,6 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
-/* hw addresses list handling functions */
-
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-	int alloc_size;
-
-	if (addr_len > MAX_ADDR_LEN)
-		return -EINVAL;
-
-	list_for_each_entry(ha, &list->list, list) {
-		if (!memcmp(ha->addr, addr, addr_len) &&
-		    ha->type == addr_type) {
-			ha->refcount++;
-			return 0;
-		}
-	}
-
-
-	alloc_size = sizeof(*ha);
-	if (alloc_size < L1_CACHE_BYTES)
-		alloc_size = L1_CACHE_BYTES;
-	ha = kmalloc(alloc_size, GFP_ATOMIC);
-	if (!ha)
-		return -ENOMEM;
-	memcpy(ha->addr, addr, addr_len);
-	ha->type = addr_type;
-	ha->refcount = 1;
-	ha->synced = false;
-	list_add_tail_rcu(&ha->list, &list->list);
-	list->count++;
-	return 0;
-}
-
-static void ha_rcu_free(struct rcu_head *head)
-{
-	struct netdev_hw_addr *ha;
-
-	ha = container_of(head, struct netdev_hw_addr, rcu_head);
-	kfree(ha);
-}
-
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-
-	list_for_each_entry(ha, &list->list, list) {
-		if (!memcmp(ha->addr, addr, addr_len) &&
-		    (ha->type == addr_type || !addr_type)) {
-			if (--ha->refcount)
-				return 0;
-			list_del_rcu(&ha->list);
-			call_rcu(&ha->rcu_head, ha_rcu_free);
-			list->count--;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len,
-				  unsigned char addr_type)
-{
-	int err;
-	struct netdev_hw_addr *ha, *ha2;
-	unsigned char type;
-
-	list_for_each_entry(ha, &from_list->list, list) {
-		type = addr_type ? addr_type : ha->type;
-		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
-		if (err)
-			goto unroll;
-	}
-	return 0;
-
-unroll:
-	list_for_each_entry(ha2, &from_list->list, list) {
-		if (ha2 == ha)
-			break;
-		type = addr_type ? addr_type : ha2->type;
-		__hw_addr_del(to_list, ha2->addr, addr_len, type);
-	}
-	return err;
-}
-
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len,
-				   unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-	unsigned char type;
-
-	list_for_each_entry(ha, &from_list->list, list) {
-		type = addr_type ? addr_type : ha->type;
-		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
-	}
-}
-
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len)
-{
-	int err = 0;
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (!ha->synced) {
-			err = __hw_addr_add(to_list, ha->addr,
-					    addr_len, ha->type);
-			if (err)
-				break;
-			ha->synced = true;
-			ha->refcount++;
-		} else if (ha->refcount == 1) {
-			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
-			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
-		}
-	}
-	return err;
-}
-
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len)
-{
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (ha->synced) {
-			__hw_addr_del(to_list, ha->addr,
-				      addr_len, ha->type);
-			ha->synced = false;
-			__hw_addr_del(from_list, ha->addr,
-				      addr_len, ha->type);
-		}
-	}
-}
-
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
-{
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &list->list, list) {
-		list_del_rcu(&ha->list);
-		call_rcu(&ha->rcu_head, ha_rcu_free);
-	}
-	list->count = 0;
-}
-
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
-{
-	INIT_LIST_HEAD(&list->list);
-	list->count = 0;
-}
-
-/* Device addresses handling functions */
-
-static void dev_addr_flush(struct net_device *dev)
-{
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_flush(&dev->dev_addrs);
-	dev->dev_addr = NULL;
-}
-
-static int dev_addr_init(struct net_device *dev)
-{
-	unsigned char addr[MAX_ADDR_LEN];
-	struct netdev_hw_addr *ha;
-	int err;
-
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_init(&dev->dev_addrs);
-	memset(addr, 0, sizeof(addr));
-	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
-			    NETDEV_HW_ADDR_T_LAN);
-	if (!err) {
-		/*
-		 * Get the first (previously created) address from the list
-		 * and set dev_addr pointer to this location.
-		 */
-		ha = list_first_entry(&dev->dev_addrs.list,
-				      struct netdev_hw_addr, list);
-		dev->dev_addr = ha->addr;
-	}
-	return err;
-}
-
-/**
- *	dev_addr_add	- Add a device address
- *	@dev: device
- *	@addr: address to add
- *	@addr_type: address type
- *
- *	Add a device address to the device or increase the reference count if
- *	it already exists.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
-		 unsigned char addr_type)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_add);
-
-/**
- *	dev_addr_del	- Release a device address.
- *	@dev: device
- *	@addr: address to delete
- *	@addr_type: address type
- *
- *	Release reference to a device address and remove it from the device
- *	if the reference count drops to zero.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
-		 unsigned char addr_type)
-{
-	int err;
-	struct netdev_hw_addr *ha;
-
-	ASSERT_RTNL();
-
-	/*
-	 * We can not remove the first address from the list because
-	 * dev->dev_addr points to that.
-	 */
-	ha = list_first_entry(&dev->dev_addrs.list,
-			      struct netdev_hw_addr, list);
-	if (ha->addr == dev->dev_addr && ha->refcount == 1)
-		return -ENOENT;
-
-	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
-			    addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_del);
-
-/**
- *	dev_addr_add_multiple	- Add device addresses from another device
- *	@to_dev: device to which addresses will be added
- *	@from_dev: device from which addresses will be added
- *	@addr_type: address type - 0 means type will be used from from_dev
- *
- *	Add device addresses of the one device to another.
- **
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_add_multiple(struct net_device *to_dev,
-			  struct net_device *from_dev,
-			  unsigned char addr_type)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	if (from_dev->addr_len != to_dev->addr_len)
-		return -EINVAL;
-	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-				     to_dev->addr_len, addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_add_multiple);
-
-/**
- *	dev_addr_del_multiple	- Delete device addresses by another device
- *	@to_dev: device where the addresses will be deleted
- *	@from_dev: device by which addresses the addresses will be deleted
- *	@addr_type: address type - 0 means type will used from from_dev
- *
- *	Deletes addresses in to device by the list of addresses in from device.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_del_multiple(struct net_device *to_dev,
-			  struct net_device *from_dev,
-			  unsigned char addr_type)
-{
-	ASSERT_RTNL();
-
-	if (from_dev->addr_len != to_dev->addr_len)
-		return -EINVAL;
-	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-			       to_dev->addr_len, addr_type);
-	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-	return 0;
-}
-EXPORT_SYMBOL(dev_addr_del_multiple);
-
-/* multicast addresses handling functions */
-
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
-		      void *addr, int alen, int glbl)
-{
-	struct dev_addr_list *da;
-
-	for (; (da = *list) != NULL; list = &da->next) {
-		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-		    alen == da->da_addrlen) {
-			if (glbl) {
-				int old_glbl = da->da_gusers;
-				da->da_gusers = 0;
-				if (old_glbl == 0)
-					break;
-			}
-			if (--da->da_users)
-				return 0;
-
-			*list = da->next;
-			kfree(da);
-			(*count)--;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-int __dev_addr_add(struct dev_addr_list **list, int *count,
-		   void *addr, int alen, int glbl)
-{
-	struct dev_addr_list *da;
-
-	for (da = *list; da != NULL; da = da->next) {
-		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-		    da->da_addrlen == alen) {
-			if (glbl) {
-				int old_glbl = da->da_gusers;
-				da->da_gusers = 1;
-				if (old_glbl)
-					return 0;
-			}
-			da->da_users++;
-			return 0;
-		}
-	}
-
-	da = kzalloc(sizeof(*da), GFP_ATOMIC);
-	if (da == NULL)
-		return -ENOMEM;
-	memcpy(da->da_addr, addr, alen);
-	da->da_addrlen = alen;
-	da->da_users = 1;
-	da->da_gusers = glbl ? 1 : 0;
-	da->next = *list;
-	*list = da;
-	(*count)++;
-	return 0;
-}
-
-/**
- *	dev_unicast_delete	- Release secondary unicast address.
- *	@dev: device
- *	@addr: address to delete
- *
- *	Release reference to a secondary unicast address and remove it
- *	from the device if the reference count drops to zero.
- *
- * 	The caller must hold the rtnl_mutex.
- */
-int dev_unicast_delete(struct net_device *dev, void *addr)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
-			    NETDEV_HW_ADDR_T_UNICAST);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_delete);
-
-/**
- *	dev_unicast_add		- add a secondary unicast address
- *	@dev: device
- *	@addr: address to add
- *
- *	Add a secondary unicast address to the device or increase
- *	the reference count if it already exists.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_unicast_add(struct net_device *dev, void *addr)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
-			    NETDEV_HW_ADDR_T_UNICAST);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_add);
-
-int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
-		    struct dev_addr_list **from, int *from_count)
-{
-	struct dev_addr_list *da, *next;
-	int err = 0;
-
-	da = *from;
-	while (da != NULL) {
-		next = da->next;
-		if (!da->da_synced) {
-			err = __dev_addr_add(to, to_count,
-					     da->da_addr, da->da_addrlen, 0);
-			if (err < 0)
-				break;
-			da->da_synced = 1;
-			da->da_users++;
-		} else if (da->da_users == 1) {
-			__dev_addr_delete(to, to_count,
-					  da->da_addr, da->da_addrlen, 0);
-			__dev_addr_delete(from, from_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
-	return err;
-}
-
-void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
-		       struct dev_addr_list **from, int *from_count)
-{
-	struct dev_addr_list *da, *next;
-
-	da = *from;
-	while (da != NULL) {
-		next = da->next;
-		if (da->da_synced) {
-			__dev_addr_delete(to, to_count,
-					  da->da_addr, da->da_addrlen, 0);
-			da->da_synced = 0;
-			__dev_addr_delete(from, from_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
-}
-
-/**
- *	dev_unicast_sync - Synchronize device's unicast list to another device
- *	@to: destination device
- *	@from: source device
- *
- *	Add newly added addresses to the destination device and release
- *	addresses that have no users left.
- *
- *	This function is intended to be called from the dev->set_rx_mode
- *	function of layered software devices.
- */
-int dev_unicast_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0;
-
-	ASSERT_RTNL();
-
-	if (to->addr_len != from->addr_len)
-		return -EINVAL;
-
-	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
-	if (!err)
-		__dev_set_rx_mode(to);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_sync);
-
-/**
- *	dev_unicast_unsync - Remove synchronized addresses from the destination device
- *	@to: destination device
- *	@from: source device
- *
- *	Remove all addresses that were added to the destination device by
- *	dev_unicast_sync(). This function is intended to be called from the
- *	dev->stop function of layered software devices.
- */
-void dev_unicast_unsync(struct net_device *to, struct net_device *from)
-{
-	ASSERT_RTNL();
-
-	if (to->addr_len != from->addr_len)
-		return;
-
-	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
-	__dev_set_rx_mode(to);
-}
-EXPORT_SYMBOL(dev_unicast_unsync);
-
-static void dev_unicast_flush(struct net_device *dev)
-{
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_flush(&dev->uc);
-}
-
-static void dev_unicast_init(struct net_device *dev)
-{
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_init(&dev->uc);
-}
-
-
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
-	struct dev_addr_list *tmp;
-
-	while (*list != NULL) {
-		tmp = *list;
-		*list = tmp->next;
-		if (tmp->da_users > tmp->da_gusers)
-			printk("__dev_addr_discard: address leakage! "
-			       "da_users=%d\n", tmp->da_users);
-		kfree(tmp);
-	}
-}
-
-static void dev_addr_discard(struct net_device *dev)
-{
-	netif_addr_lock_bh(dev);
-
-	__dev_addr_discard(&dev->mc_list);
-	dev->mc_count = 0;
-
-	netif_addr_unlock_bh(dev);
-}
-
 /**
  *	dev_get_flags - get flags reported to userspace
  *	@dev: device
@@ -4059,19 +4432,12 @@ unsigned dev_get_flags(const struct net_device *dev)
 
 	return flags;
 }
+EXPORT_SYMBOL(dev_get_flags);
 
-/**
- *	dev_change_flags - change device settings
- *	@dev: device
- *	@flags: device state flags
- *
- *	Change settings on device based state flags. The flags are
- *	in the userspace exported format.
- */
-int dev_change_flags(struct net_device *dev, unsigned flags)
+int __dev_change_flags(struct net_device *dev, unsigned int flags)
 {
-	int ret, changes;
 	int old_flags = dev->flags;
+	int ret;
 
 	ASSERT_RTNL();
 
@@ -4102,19 +4468,15 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 
 	ret = 0;
 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
-		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
 
 		if (!ret)
 			dev_set_rx_mode(dev);
 	}
 
-	if (dev->flags & IFF_UP &&
-	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
-					  IFF_VOLATILE)))
-		call_netdevice_notifiers(NETDEV_CHANGE, dev);
-
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
-		int inc = (flags & IFF_PROMISC) ? +1 : -1;
+		int inc = (flags & IFF_PROMISC) ? 1 : -1;
+
 		dev->gflags ^= IFF_PROMISC;
 		dev_set_promiscuity(dev, inc);
 	}
@@ -4124,18 +4486,56 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 	   IFF_ALLMULTI is requested not asking us and not reporting.
 	 */
 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
-		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
+		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
+
 		dev->gflags ^= IFF_ALLMULTI;
 		dev_set_allmulti(dev, inc);
 	}
 
-	/* Exclude state transition flags, already notified */
-	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
+	return ret;
+}
+
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
+{
+	unsigned int changes = dev->flags ^ old_flags;
+
+	if (changes & IFF_UP) {
+		if (dev->flags & IFF_UP)
+			call_netdevice_notifiers(NETDEV_UP, dev);
+		else
+			call_netdevice_notifiers(NETDEV_DOWN, dev);
+	}
+
+	if (dev->flags & IFF_UP &&
+	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
+		call_netdevice_notifiers(NETDEV_CHANGE, dev);
+}
+
+/**
+ *	dev_change_flags - change device settings
+ *	@dev: device
+ *	@flags: device state flags
+ *
+ *	Change settings on device based state flags. The flags are
+ *	in the userspace exported format.
+ */
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+	int ret, changes;
+	int old_flags = dev->flags;
+
+	ret = __dev_change_flags(dev, flags);
+	if (ret < 0)
+		return ret;
+
+	changes = old_flags ^ dev->flags;
 	if (changes)
 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
 
+	__dev_notify_flags(dev, old_flags);
 	return ret;
 }
+EXPORT_SYMBOL(dev_change_flags);
 
 /**
  *	dev_set_mtu - Change maximum transfer unit
@@ -4169,6 +4569,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
 	return err;
 }
+EXPORT_SYMBOL(dev_set_mtu);
 
 /**
  *	dev_set_mac_address - Change Media Access Control Address
@@ -4193,69 +4594,70 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	return err;
 }
+EXPORT_SYMBOL(dev_set_mac_address);
 
 /*
- *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
+ *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
  */
 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
 {
 	int err;
-	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
 
 	if (!dev)
 		return -ENODEV;
 
 	switch (cmd) {
-		case SIOCGIFFLAGS:	/* Get interface flags */
-			ifr->ifr_flags = (short) dev_get_flags(dev);
-			return 0;
+	case SIOCGIFFLAGS:	/* Get interface flags */
+		ifr->ifr_flags = (short) dev_get_flags(dev);
+		return 0;
 
-		case SIOCGIFMETRIC:	/* Get the metric on the interface
-					   (currently unused) */
-			ifr->ifr_metric = 0;
-			return 0;
+	case SIOCGIFMETRIC:	/* Get the metric on the interface
+				   (currently unused) */
+		ifr->ifr_metric = 0;
+		return 0;
 
-		case SIOCGIFMTU:	/* Get the MTU of a device */
-			ifr->ifr_mtu = dev->mtu;
-			return 0;
+	case SIOCGIFMTU:	/* Get the MTU of a device */
+		ifr->ifr_mtu = dev->mtu;
+		return 0;
 
-		case SIOCGIFHWADDR:
-			if (!dev->addr_len)
-				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
-			else
-				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
-				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-			ifr->ifr_hwaddr.sa_family = dev->type;
-			return 0;
+	case SIOCGIFHWADDR:
+		if (!dev->addr_len)
+			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
+		else
+			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+		ifr->ifr_hwaddr.sa_family = dev->type;
+		return 0;
 
-		case SIOCGIFSLAVE:
-			err = -EINVAL;
-			break;
+	case SIOCGIFSLAVE:
+		err = -EINVAL;
+		break;
 
-		case SIOCGIFMAP:
-			ifr->ifr_map.mem_start = dev->mem_start;
-			ifr->ifr_map.mem_end   = dev->mem_end;
-			ifr->ifr_map.base_addr = dev->base_addr;
-			ifr->ifr_map.irq       = dev->irq;
-			ifr->ifr_map.dma       = dev->dma;
-			ifr->ifr_map.port      = dev->if_port;
-			return 0;
+	case SIOCGIFMAP:
+		ifr->ifr_map.mem_start = dev->mem_start;
+		ifr->ifr_map.mem_end   = dev->mem_end;
+		ifr->ifr_map.base_addr = dev->base_addr;
+		ifr->ifr_map.irq       = dev->irq;
+		ifr->ifr_map.dma       = dev->dma;
+		ifr->ifr_map.port      = dev->if_port;
+		return 0;
 
-		case SIOCGIFINDEX:
-			ifr->ifr_ifindex = dev->ifindex;
-			return 0;
+	case SIOCGIFINDEX:
+		ifr->ifr_ifindex = dev->ifindex;
+		return 0;
 
-		case SIOCGIFTXQLEN:
-			ifr->ifr_qlen = dev->tx_queue_len;
-			return 0;
+	case SIOCGIFTXQLEN:
+		ifr->ifr_qlen = dev->tx_queue_len;
+		return 0;
 
-		default:
-			/* dev_ioctl() should ensure this case
-			 * is never reached
-			 */
-			WARN_ON(1);
-			err = -EINVAL;
-			break;
+	default:
+		/* dev_ioctl() should ensure this case
+		 * is never reached
+		 */
+		WARN_ON(1);
+		err = -EINVAL;
+		break;
 
 	}
 	return err;
@@ -4276,92 +4678,89 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 	ops = dev->netdev_ops;
 
 	switch (cmd) {
-		case SIOCSIFFLAGS:	/* Set interface flags */
-			return dev_change_flags(dev, ifr->ifr_flags);
-
-		case SIOCSIFMETRIC:	/* Set the metric on the interface
-					   (currently unused) */
-			return -EOPNOTSUPP;
+	case SIOCSIFFLAGS:	/* Set interface flags */
+		return dev_change_flags(dev, ifr->ifr_flags);
 
-		case SIOCSIFMTU:	/* Set the MTU of a device */
-			return dev_set_mtu(dev, ifr->ifr_mtu);
+	case SIOCSIFMETRIC:	/* Set the metric on the interface
+				   (currently unused) */
+		return -EOPNOTSUPP;
 
-		case SIOCSIFHWADDR:
-			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+	case SIOCSIFMTU:	/* Set the MTU of a device */
+		return dev_set_mtu(dev, ifr->ifr_mtu);
 
-		case SIOCSIFHWBROADCAST:
-			if (ifr->ifr_hwaddr.sa_family != dev->type)
-				return -EINVAL;
-			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
-			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-			return 0;
+	case SIOCSIFHWADDR:
+		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
 
-		case SIOCSIFMAP:
-			if (ops->ndo_set_config) {
-				if (!netif_device_present(dev))
-					return -ENODEV;
-				return ops->ndo_set_config(dev, &ifr->ifr_map);
-			}
-			return -EOPNOTSUPP;
+	case SIOCSIFHWBROADCAST:
+		if (ifr->ifr_hwaddr.sa_family != dev->type)
+			return -EINVAL;
+		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+		return 0;
 
-		case SIOCADDMULTI:
-			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
-			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
-				return -EINVAL;
+	case SIOCSIFMAP:
+		if (ops->ndo_set_config) {
 			if (!netif_device_present(dev))
 				return -ENODEV;
-			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
-					  dev->addr_len, 1);
+			return ops->ndo_set_config(dev, &ifr->ifr_map);
+		}
+		return -EOPNOTSUPP;
 
-		case SIOCDELMULTI:
-			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
-			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
-				return -EINVAL;
-			if (!netif_device_present(dev))
-				return -ENODEV;
-			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
-					     dev->addr_len, 1);
+	case SIOCADDMULTI:
+		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+			return -EINVAL;
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
 
-		case SIOCSIFTXQLEN:
-			if (ifr->ifr_qlen < 0)
-				return -EINVAL;
-			dev->tx_queue_len = ifr->ifr_qlen;
-			return 0;
+	case SIOCDELMULTI:
+		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+			return -EINVAL;
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
 
-		case SIOCSIFNAME:
-			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
-			return dev_change_name(dev, ifr->ifr_newname);
+	case SIOCSIFTXQLEN:
+		if (ifr->ifr_qlen < 0)
+			return -EINVAL;
+		dev->tx_queue_len = ifr->ifr_qlen;
+		return 0;
 
-		/*
-		 *	Unknown or private ioctl
-		 */
+	case SIOCSIFNAME:
+		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
+		return dev_change_name(dev, ifr->ifr_newname);
 
-		default:
-			if ((cmd >= SIOCDEVPRIVATE &&
-			    cmd <= SIOCDEVPRIVATE + 15) ||
-			    cmd == SIOCBONDENSLAVE ||
-			    cmd == SIOCBONDRELEASE ||
-			    cmd == SIOCBONDSETHWADDR ||
-			    cmd == SIOCBONDSLAVEINFOQUERY ||
-			    cmd == SIOCBONDINFOQUERY ||
-			    cmd == SIOCBONDCHANGEACTIVE ||
-			    cmd == SIOCGMIIPHY ||
-			    cmd == SIOCGMIIREG ||
-			    cmd == SIOCSMIIREG ||
-			    cmd == SIOCBRADDIF ||
-			    cmd == SIOCBRDELIF ||
-			    cmd == SIOCSHWTSTAMP ||
-			    cmd == SIOCWANDEV) {
-				err = -EOPNOTSUPP;
-				if (ops->ndo_do_ioctl) {
-					if (netif_device_present(dev))
-						err = ops->ndo_do_ioctl(dev, ifr, cmd);
-					else
-						err = -ENODEV;
-				}
-			} else
-				err = -EINVAL;
+	/*
+	 *	Unknown or private ioctl
+	 */
+	default:
+		if ((cmd >= SIOCDEVPRIVATE &&
+		    cmd <= SIOCDEVPRIVATE + 15) ||
+		    cmd == SIOCBONDENSLAVE ||
+		    cmd == SIOCBONDRELEASE ||
+		    cmd == SIOCBONDSETHWADDR ||
+		    cmd == SIOCBONDSLAVEINFOQUERY ||
+		    cmd == SIOCBONDINFOQUERY ||
+		    cmd == SIOCBONDCHANGEACTIVE ||
+		    cmd == SIOCGMIIPHY ||
+		    cmd == SIOCGMIIREG ||
+		    cmd == SIOCSMIIREG ||
+		    cmd == SIOCBRADDIF ||
+		    cmd == SIOCBRDELIF ||
+		    cmd == SIOCSHWTSTAMP ||
+		    cmd == SIOCWANDEV) {
+			err = -EOPNOTSUPP;
+			if (ops->ndo_do_ioctl) {
+				if (netif_device_present(dev))
+					err = ops->ndo_do_ioctl(dev, ifr, cmd);
+				else
+					err = -ENODEV;
+			}
+		} else
+			err = -EINVAL;
 
 	}
 	return err;
@@ -4418,135 +4817,135 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	 */
 
 	switch (cmd) {
-		/*
-		 *	These ioctl calls:
-		 *	- can be done by all.
-		 *	- atomic and do not require locking.
-		 *	- return a value
-		 */
-		case SIOCGIFFLAGS:
-		case SIOCGIFMETRIC:
-		case SIOCGIFMTU:
-		case SIOCGIFHWADDR:
-		case SIOCGIFSLAVE:
-		case SIOCGIFMAP:
-		case SIOCGIFINDEX:
-		case SIOCGIFTXQLEN:
-			dev_load(net, ifr.ifr_name);
-			read_lock(&dev_base_lock);
-			ret = dev_ifsioc_locked(net, &ifr, cmd);
-			read_unlock(&dev_base_lock);
-			if (!ret) {
-				if (colon)
-					*colon = ':';
-				if (copy_to_user(arg, &ifr,
-						 sizeof(struct ifreq)))
-					ret = -EFAULT;
-			}
-			return ret;
+	/*
+	 *	These ioctl calls:
+	 *	- can be done by all.
+	 *	- atomic and do not require locking.
+	 *	- return a value
+	 */
+	case SIOCGIFFLAGS:
+	case SIOCGIFMETRIC:
+	case SIOCGIFMTU:
+	case SIOCGIFHWADDR:
+	case SIOCGIFSLAVE:
+	case SIOCGIFMAP:
+	case SIOCGIFINDEX:
+	case SIOCGIFTXQLEN:
+		dev_load(net, ifr.ifr_name);
+		rcu_read_lock();
+		ret = dev_ifsioc_locked(net, &ifr, cmd);
+		rcu_read_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
 
-		case SIOCETHTOOL:
-			dev_load(net, ifr.ifr_name);
-			rtnl_lock();
-			ret = dev_ethtool(net, &ifr);
-			rtnl_unlock();
-			if (!ret) {
-				if (colon)
-					*colon = ':';
-				if (copy_to_user(arg, &ifr,
-						 sizeof(struct ifreq)))
-					ret = -EFAULT;
-			}
-			return ret;
+	case SIOCETHTOOL:
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ethtool(net, &ifr);
+		rtnl_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
 
-		/*
-		 *	These ioctl calls:
-		 *	- require superuser power.
-		 *	- require strict serialization.
-		 *	- return a value
-		 */
-		case SIOCGMIIPHY:
-		case SIOCGMIIREG:
-		case SIOCSIFNAME:
-			if (!capable(CAP_NET_ADMIN))
-				return -EPERM;
-			dev_load(net, ifr.ifr_name);
-			rtnl_lock();
-			ret = dev_ifsioc(net, &ifr, cmd);
-			rtnl_unlock();
-			if (!ret) {
-				if (colon)
-					*colon = ':';
-				if (copy_to_user(arg, &ifr,
-						 sizeof(struct ifreq)))
-					ret = -EFAULT;
-			}
-			return ret;
+	/*
+	 *	These ioctl calls:
+	 *	- require superuser power.
+	 *	- require strict serialization.
+	 *	- return a value
+	 */
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSIFNAME:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ifsioc(net, &ifr, cmd);
+		rtnl_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
 
-		/*
-		 *	These ioctl calls:
-		 *	- require superuser power.
-		 *	- require strict serialization.
-		 *	- do not return a value
-		 */
-		case SIOCSIFFLAGS:
-		case SIOCSIFMETRIC:
-		case SIOCSIFMTU:
-		case SIOCSIFMAP:
-		case SIOCSIFHWADDR:
-		case SIOCSIFSLAVE:
-		case SIOCADDMULTI:
-		case SIOCDELMULTI:
-		case SIOCSIFHWBROADCAST:
-		case SIOCSIFTXQLEN:
-		case SIOCSMIIREG:
-		case SIOCBONDENSLAVE:
-		case SIOCBONDRELEASE:
-		case SIOCBONDSETHWADDR:
-		case SIOCBONDCHANGEACTIVE:
-		case SIOCBRADDIF:
-		case SIOCBRDELIF:
-		case SIOCSHWTSTAMP:
-			if (!capable(CAP_NET_ADMIN))
-				return -EPERM;
-			/* fall through */
-		case SIOCBONDSLAVEINFOQUERY:
-		case SIOCBONDINFOQUERY:
+	/*
+	 *	These ioctl calls:
+	 *	- require superuser power.
+	 *	- require strict serialization.
+	 *	- do not return a value
+	 */
+	case SIOCSIFFLAGS:
+	case SIOCSIFMETRIC:
+	case SIOCSIFMTU:
+	case SIOCSIFMAP:
+	case SIOCSIFHWADDR:
+	case SIOCSIFSLAVE:
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+	case SIOCSIFHWBROADCAST:
+	case SIOCSIFTXQLEN:
+	case SIOCSMIIREG:
+	case SIOCBONDENSLAVE:
+	case SIOCBONDRELEASE:
+	case SIOCBONDSETHWADDR:
+	case SIOCBONDCHANGEACTIVE:
+	case SIOCBRADDIF:
+	case SIOCBRDELIF:
+	case SIOCSHWTSTAMP:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		/* fall through */
+	case SIOCBONDSLAVEINFOQUERY:
+	case SIOCBONDINFOQUERY:
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ifsioc(net, &ifr, cmd);
+		rtnl_unlock();
+		return ret;
+
+	case SIOCGIFMEM:
+		/* Get the per device memory space. We can add this but
+		 * currently do not support it */
+	case SIOCSIFMEM:
+		/* Set the per device memory buffer space.
+		 * Not applicable in our case */
+	case SIOCSIFLINK:
+		return -EINVAL;
+
+	/*
+	 *	Unknown or private ioctl.
+	 */
+	default:
+		if (cmd == SIOCWANDEV ||
+		    (cmd >= SIOCDEVPRIVATE &&
+		     cmd <= SIOCDEVPRIVATE + 15)) {
 			dev_load(net, ifr.ifr_name);
 			rtnl_lock();
 			ret = dev_ifsioc(net, &ifr, cmd);
 			rtnl_unlock();
+			if (!ret && copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+				ret = -EFAULT;
 			return ret;
-
-		case SIOCGIFMEM:
-			/* Get the per device memory space. We can add this but
-			 * currently do not support it */
-		case SIOCSIFMEM:
-			/* Set the per device memory buffer space.
-			 * Not applicable in our case */
-		case SIOCSIFLINK:
-			return -EINVAL;
-
-		/*
-		 *	Unknown or private ioctl.
-		 */
-		default:
-			if (cmd == SIOCWANDEV ||
-			    (cmd >= SIOCDEVPRIVATE &&
-			     cmd <= SIOCDEVPRIVATE + 15)) {
-				dev_load(net, ifr.ifr_name);
-				rtnl_lock();
-				ret = dev_ifsioc(net, &ifr, cmd);
-				rtnl_unlock();
-				if (!ret && copy_to_user(arg, &ifr,
-							 sizeof(struct ifreq)))
-					ret = -EFAULT;
-				return ret;
-			}
-			/* Take care of Wireless Extensions */
-			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
-				return wext_handle_ioctl(net, &ifr, cmd, arg);
-			return -EINVAL;
+		}
+		/* Take care of Wireless Extensions */
+		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+			return wext_handle_ioctl(net, &ifr, cmd, arg);
+		return -EINVAL;
 	}
 }
 
@@ -4578,74 +4977,88 @@ static void net_set_todo(struct net_device *dev)
 	list_add_tail(&dev->todo_list, &net_todo_list);
 }
 
-static void rollback_registered(struct net_device *dev)
+static void rollback_registered_many(struct list_head *head)
 {
+	struct net_device *dev, *tmp;
+
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
 
-	/* Some devices call without registering for initialization unwind. */
-	if (dev->reg_state == NETREG_UNINITIALIZED) {
-		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
-				  "was registered\n", dev->name, dev);
+	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
+		/* Some devices call without registering
+		 * for initialization unwind. Remove those
+		 * devices and proceed with the remaining.
+		 */
+		if (dev->reg_state == NETREG_UNINITIALIZED) {
+			pr_debug("unregister_netdevice: device %s/%p never "
+				 "was registered\n", dev->name, dev);
 
-		WARN_ON(1);
-		return;
-	}
+			WARN_ON(1);
+			list_del(&dev->unreg_list);
+			continue;
+		}
 
-	BUG_ON(dev->reg_state != NETREG_REGISTERED);
+		BUG_ON(dev->reg_state != NETREG_REGISTERED);
+	}
 
 	/* If device is running, close it first. */
-	dev_close(dev);
+	dev_close_many(head);
 
-	/* And unlink it from device chain. */
-	unlist_netdevice(dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		/* And unlink it from device chain. */
+		unlist_netdevice(dev);
 
-	dev->reg_state = NETREG_UNREGISTERING;
+		dev->reg_state = NETREG_UNREGISTERING;
+	}
 
 	synchronize_net();
 
-	/* Shutdown queueing discipline. */
-	dev_shutdown(dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		/* Shutdown queueing discipline. */
+		dev_shutdown(dev);
 
 
-	/* Notify protocols, that we are about to destroy
-	   this device. They should clean all the things.
-	*/
-	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+		/* Notify protocols, that we are about to destroy
+		   this device. They should clean all the things.
+		*/
+		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 
-	/*
-	 *	Flush the unicast and multicast chains
-	 */
-	dev_unicast_flush(dev);
-	dev_addr_discard(dev);
+		if (!dev->rtnl_link_ops ||
+		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
 
-	if (dev->netdev_ops->ndo_uninit)
-		dev->netdev_ops->ndo_uninit(dev);
+		/*
+		 *	Flush the unicast and multicast chains
+		 */
+		dev_uc_flush(dev);
+		dev_mc_flush(dev);
 
-	/* Notifier chain MUST detach us from master device. */
-	WARN_ON(dev->master);
+		if (dev->netdev_ops->ndo_uninit)
+			dev->netdev_ops->ndo_uninit(dev);
 
-	/* Remove entries from kobject tree */
-	netdev_unregister_kobject(dev);
+		/* Notifier chain MUST detach us from master device. */
+		WARN_ON(dev->master);
 
-	synchronize_net();
+		/* Remove entries from kobject tree */
+		netdev_unregister_kobject(dev);
+	}
 
-	dev_put(dev);
-}
+	/* Process any work delayed until the end of the batch */
+	dev = list_first_entry(head, struct net_device, unreg_list);
+	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
-static void __netdev_init_queue_locks_one(struct net_device *dev,
-					  struct netdev_queue *dev_queue,
-					  void *_unused)
-{
-	spin_lock_init(&dev_queue->_xmit_lock);
-	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
-	dev_queue->xmit_lock_owner = -1;
+	rcu_barrier();
+
+	list_for_each_entry(dev, head, unreg_list)
+		dev_put(dev);
 }
 
-static void netdev_init_queue_locks(struct net_device *dev)
+static void rollback_registered(struct net_device *dev)
 {
-	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
-	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
+	LIST_HEAD(single);
+
+	list_add(&dev->unreg_list, &single);
+	rollback_registered_many(&single);
 }
 
 unsigned long netdev_fix_features(unsigned long features, const char *name)
@@ -4668,10 +5081,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
 	}
 
 	if (features & NETIF_F_UFO) {
-		if (!(features & NETIF_F_GEN_CSUM)) {
+		/* maybe split UFO into V4 and V6? */
+		if (!((features & NETIF_F_GEN_CSUM) ||
+		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
 			if (name)
 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
-				       "since no NETIF_F_HW_CSUM feature.\n",
+				       "since no checksum offload features.\n",
 				       name);
 			features &= ~NETIF_F_UFO;
 		}
@@ -4689,6 +5105,86 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
 EXPORT_SYMBOL(netdev_fix_features);
 
 /**
+ *	netif_stacked_transfer_operstate -	transfer operstate
+ *	@rootdev: the root or lower level device to transfer state from
+ *	@dev: the device to transfer operstate to
+ *
+ *	Transfer operational state from root to device. This is normally
+ *	called when a stacking relationship exists between the root
+ *	device and the device(a leaf device).
+ */
+void netif_stacked_transfer_operstate(const struct net_device *rootdev,
+					struct net_device *dev)
+{
+	if (rootdev->operstate == IF_OPER_DORMANT)
+		netif_dormant_on(dev);
+	else
+		netif_dormant_off(dev);
+
+	if (netif_carrier_ok(rootdev)) {
+		if (!netif_carrier_ok(dev))
+			netif_carrier_on(dev);
+	} else {
+		if (netif_carrier_ok(dev))
+			netif_carrier_off(dev);
+	}
+}
+EXPORT_SYMBOL(netif_stacked_transfer_operstate);
+
+#ifdef CONFIG_RPS
+static int netif_alloc_rx_queues(struct net_device *dev)
+{
+	unsigned int i, count = dev->num_rx_queues;
+	struct netdev_rx_queue *rx;
+
+	BUG_ON(count < 1);
+
+	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+	if (!rx) {
+		pr_err("netdev: Unable to allocate %u rx queues.\n", count);
+		return -ENOMEM;
+	}
+	dev->_rx = rx;
+
+	for (i = 0; i < count; i++)
+		rx[i].dev = dev;
+	return 0;
+}
+#endif
+
+static void netdev_init_one_queue(struct net_device *dev,
+				  struct netdev_queue *queue, void *_unused)
+{
+	/* Initialize queue lock */
+	spin_lock_init(&queue->_xmit_lock);
+	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+	queue->xmit_lock_owner = -1;
+	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
+	queue->dev = dev;
+}
+
+static int netif_alloc_netdev_queues(struct net_device *dev)
+{
+	unsigned int count = dev->num_tx_queues;
+	struct netdev_queue *tx;
+
+	BUG_ON(count < 1);
+
+	tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
+	if (!tx) {
+		pr_err("netdev: Unable to allocate %u tx queues.\n",
+		       count);
+		return -ENOMEM;
+	}
+	dev->_tx = tx;
+
+	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+	spin_lock_init(&dev->tx_global_lock);
+
+	return 0;
+}
+
+/**
  *	register_netdevice	- register a network device
  *	@dev: device to register
  *
@@ -4707,8 +5203,6 @@ EXPORT_SYMBOL(netdev_fix_features);
 
 int register_netdevice(struct net_device *dev)
 {
-	struct hlist_head *head;
-	struct hlist_node *p;
 	int ret;
 	struct net *net = dev_net(dev);
 
@@ -4723,7 +5217,6 @@ int register_netdevice(struct net_device *dev)
 
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
-	netdev_init_queue_locks(dev);
 
 	dev->iflink = -1;
 
@@ -4737,26 +5230,14 @@ int register_netdevice(struct net_device *dev)
 		}
 	}
 
-	if (!dev_valid_name(dev->name)) {
-		ret = -EINVAL;
+	ret = dev_get_valid_name(dev, dev->name, 0);
+	if (ret)
 		goto err_uninit;
-	}
 
 	dev->ifindex = dev_new_index(net);
 	if (dev->iflink == -1)
 		dev->iflink = dev->ifindex;
 
-	/* Check for existence of name */
-	head = dev_name_hash(net, dev->name);
-	hlist_for_each(p, head) {
-		struct net_device *d
-			= hlist_entry(p, struct net_device, name_hlist);
-		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
-			ret = -EEXIST;
-			goto err_uninit;
-		}
-	}
-
 	/* Fix illegal checksum combinations */
 	if ((dev->features & NETIF_F_HW_CSUM) &&
 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
@@ -4778,7 +5259,17 @@ int register_netdevice(struct net_device *dev)
 	if (dev->features & NETIF_F_SG)
 		dev->features |= NETIF_F_GSO;
 
-	netdev_initialize_kobject(dev);
+	/* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
+	 * vlan_dev_init() will do the dev->features check, so these features
+	 * are enabled only if supported by underlying device.
+	 */
+	dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
+
+	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
+	ret = notifier_to_errno(ret);
+	if (ret)
+		goto err_uninit;
+
 	ret = netdev_register_kobject(dev);
 	if (ret)
 		goto err_uninit;
@@ -4802,6 +5293,13 @@ int register_netdevice(struct net_device *dev)
 		rollback_registered(dev);
 		dev->reg_state = NETREG_UNREGISTERED;
 	}
+	/*
+	 *	Prevent userspace races by waiting until the network
+	 *	device is fully setup before sending notifications.
+	 */
+	if (!dev->rtnl_link_ops ||
+	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
 
 out:
 	return ret;
@@ -4811,6 +5309,7 @@ err_uninit:
 		dev->netdev_ops->ndo_uninit(dev);
 	goto out;
 }
+EXPORT_SYMBOL(register_netdevice);
 
 /**
  *	init_dummy_netdev	- init a dummy network device for NAPI
@@ -4836,9 +5335,6 @@ int init_dummy_netdev(struct net_device *dev)
 	 */
 	dev->reg_state = NETREG_DUMMY;
 
-	/* initialize the ref count */
-	atomic_set(&dev->refcnt, 1);
-
 	/* NAPI wants this */
 	INIT_LIST_HEAD(&dev->napi_list);
 
@@ -4846,6 +5342,11 @@ int init_dummy_netdev(struct net_device *dev)
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 	set_bit(__LINK_STATE_START, &dev->state);
 
+	/* Note : We dont allocate pcpu_refcnt for dummy devices,
+	 * because users of this 'device' dont need to change
+	 * its refcount.
+	 */
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -4887,6 +5388,16 @@ out:
 }
 EXPORT_SYMBOL(register_netdev);
 
+int netdev_refcnt_read(const struct net_device *dev)
+{
+	int i, refcnt = 0;
+
+	for_each_possible_cpu(i)
+		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
+	return refcnt;
+}
+EXPORT_SYMBOL(netdev_refcnt_read);
+
 /*
  * netdev_wait_allrefs - wait until all references are gone.
  *
@@ -4901,14 +5412,21 @@ EXPORT_SYMBOL(register_netdev);
 static void netdev_wait_allrefs(struct net_device *dev)
 {
 	unsigned long rebroadcast_time, warning_time;
+	int refcnt;
+
+	linkwatch_forget_dev(dev);
 
 	rebroadcast_time = warning_time = jiffies;
-	while (atomic_read(&dev->refcnt) != 0) {
+	refcnt = netdev_refcnt_read(dev);
+
+	while (refcnt != 0) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
 			rtnl_lock();
 
 			/* Rebroadcast unregister notification */
 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
+			 * should have already handle it the first time */
 
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
 				     &dev->state)) {
@@ -4928,11 +5446,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
 		msleep(250);
 
+		refcnt = netdev_refcnt_read(dev);
+
 		if (time_after(jiffies, warning_time + 10 * HZ)) {
 			printk(KERN_EMERG "unregister_netdevice: "
 			       "waiting for %s to become free. Usage "
 			       "count = %d\n",
-			       dev->name, atomic_read(&dev->refcnt));
+			       dev->name, refcnt);
 			warning_time = jiffies;
 		}
 	}
@@ -4973,7 +5493,7 @@ void netdev_run_todo(void)
 
 	while (!list_empty(&list)) {
 		struct net_device *dev
-			= list_entry(list.next, struct net_device, todo_list);
+			= list_first_entry(&list, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
@@ -4990,9 +5510,9 @@ void netdev_run_todo(void)
 		netdev_wait_allrefs(dev);
 
 		/* paranoia */
-		BUG_ON(atomic_read(&dev->refcnt));
-		WARN_ON(dev->ip_ptr);
-		WARN_ON(dev->ip6_ptr);
+		BUG_ON(netdev_refcnt_read(dev));
+		WARN_ON(rcu_dereference_raw(dev->ip_ptr));
+		WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
 
 		if (dev->destructor)
@@ -5004,76 +5524,137 @@ void netdev_run_todo(void)
 }
 
 /**
+ *	dev_txq_stats_fold - fold tx_queues stats
+ *	@dev: device to get statistics from
+ *	@stats: struct rtnl_link_stats64 to hold results
+ */
+void dev_txq_stats_fold(const struct net_device *dev,
+			struct rtnl_link_stats64 *stats)
+{
+	u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+	unsigned int i;
+	struct netdev_queue *txq;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		txq = netdev_get_tx_queue(dev, i);
+		spin_lock_bh(&txq->_xmit_lock);
+		tx_bytes   += txq->tx_bytes;
+		tx_packets += txq->tx_packets;
+		tx_dropped += txq->tx_dropped;
+		spin_unlock_bh(&txq->_xmit_lock);
+	}
+	if (tx_bytes || tx_packets || tx_dropped) {
+		stats->tx_bytes   = tx_bytes;
+		stats->tx_packets = tx_packets;
+		stats->tx_dropped = tx_dropped;
+	}
+}
+EXPORT_SYMBOL(dev_txq_stats_fold);
+
+/* Convert net_device_stats to rtnl_link_stats64.  They have the same
+ * fields in the same order, with only the type differing.
+ */
+static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+				    const struct net_device_stats *netdev_stats)
+{
+#if BITS_PER_LONG == 64
+        BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
+        memcpy(stats64, netdev_stats, sizeof(*stats64));
+#else
+	size_t i, n = sizeof(*stats64) / sizeof(u64);
+	const unsigned long *src = (const unsigned long *)netdev_stats;
+	u64 *dst = (u64 *)stats64;
+
+	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
+		     sizeof(*stats64) / sizeof(u64));
+	for (i = 0; i < n; i++)
+		dst[i] = src[i];
+#endif
+}
+
+/**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
+ *	@storage: place to store stats
  *
- *	Get network statistics from device. The device driver may provide
- *	its own method by setting dev->netdev_ops->get_stats; otherwise
- *	the internal statistics structure is used.
+ *	Get network statistics from device. Return @storage.
+ *	The device driver may provide its own method by setting
+ *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
+ *	otherwise the internal statistics structure is used.
  */
-const struct net_device_stats *dev_get_stats(struct net_device *dev)
+struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					struct rtnl_link_stats64 *storage)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-	if (ops->ndo_get_stats)
-		return ops->ndo_get_stats(dev);
-	else {
-		unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
-		struct net_device_stats *stats = &dev->stats;
-		unsigned int i;
-		struct netdev_queue *txq;
-
-		for (i = 0; i < dev->num_tx_queues; i++) {
-			txq = netdev_get_tx_queue(dev, i);
-			tx_bytes   += txq->tx_bytes;
-			tx_packets += txq->tx_packets;
-			tx_dropped += txq->tx_dropped;
-		}
-		if (tx_bytes || tx_packets || tx_dropped) {
-			stats->tx_bytes   = tx_bytes;
-			stats->tx_packets = tx_packets;
-			stats->tx_dropped = tx_dropped;
-		}
-		return stats;
+	if (ops->ndo_get_stats64) {
+		memset(storage, 0, sizeof(*storage));
+		ops->ndo_get_stats64(dev, storage);
+	} else if (ops->ndo_get_stats) {
+		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
+	} else {
+		netdev_stats_to_stats64(storage, &dev->stats);
+		dev_txq_stats_fold(dev, storage);
 	}
+	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
+	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
 
-static void netdev_init_one_queue(struct net_device *dev,
-				  struct netdev_queue *queue,
-				  void *_unused)
+struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 {
-	queue->dev = dev;
-}
+	struct netdev_queue *queue = dev_ingress_queue(dev);
 
-static void netdev_init_queues(struct net_device *dev)
-{
-	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
-	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
-	spin_lock_init(&dev->tx_global_lock);
+#ifdef CONFIG_NET_CLS_ACT
+	if (queue)
+		return queue;
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return NULL;
+	netdev_init_one_queue(dev, queue, NULL);
+	queue->qdisc = &noop_qdisc;
+	queue->qdisc_sleeping = &noop_qdisc;
+	rcu_assign_pointer(dev->ingress_queue, queue);
+#endif
+	return queue;
 }
 
 /**
- *	alloc_netdev_mq - allocate network device
+ *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
  *	@name:		device name format string
  *	@setup:		callback to initialize device
- *	@queue_count:	the number of subqueues to allocate
+ *	@txqs:		the number of TX subqueues to allocate
+ *	@rxqs:		the number of RX subqueues to allocate
  *
  *	Allocates a struct net_device with private data area for driver use
  *	and performs basic initialization.  Also allocates subquue structs
- *	for each queue on the device at the end of the netdevice.
+ *	for each queue on the device.
  */
-struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
-		void (*setup)(struct net_device *), unsigned int queue_count)
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+		void (*setup)(struct net_device *),
+		unsigned int txqs, unsigned int rxqs)
 {
-	struct netdev_queue *tx;
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
+	if (txqs < 1) {
+		pr_err("alloc_netdev: Unable to allocate device "
+		       "with zero queues.\n");
+		return NULL;
+	}
+
+#ifdef CONFIG_RPS
+	if (rxqs < 1) {
+		pr_err("alloc_netdev: Unable to allocate device "
+		       "with zero RX queues.\n");
+		return NULL;
+	}
+#endif
+
 	alloc_size = sizeof(struct net_device);
 	if (sizeof_priv) {
 		/* ensure 32-byte alignment of private area */
@@ -5089,45 +5670,57 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		return NULL;
 	}
 
-	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
-	if (!tx) {
-		printk(KERN_ERR "alloc_netdev: Unable to allocate "
-		       "tx qdiscs.\n");
-		goto free_p;
-	}
-
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
+	dev->pcpu_refcnt = alloc_percpu(int);
+	if (!dev->pcpu_refcnt)
+		goto free_p;
+
 	if (dev_addr_init(dev))
-		goto free_tx;
+		goto free_pcpu;
 
-	dev_unicast_init(dev);
+	dev_mc_init(dev);
+	dev_uc_init(dev);
 
 	dev_net_set(dev, &init_net);
 
-	dev->_tx = tx;
-	dev->num_tx_queues = queue_count;
-	dev->real_num_tx_queues = queue_count;
+	dev->num_tx_queues = txqs;
+	dev->real_num_tx_queues = txqs;
+	if (netif_alloc_netdev_queues(dev))
+		goto free_pcpu;
 
-	dev->gso_max_size = GSO_MAX_SIZE;
+#ifdef CONFIG_RPS
+	dev->num_rx_queues = rxqs;
+	dev->real_num_rx_queues = rxqs;
+	if (netif_alloc_rx_queues(dev))
+		goto free_pcpu;
+#endif
 
-	netdev_init_queues(dev);
+	dev->gso_max_size = GSO_MAX_SIZE;
 
+	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
+	dev->ethtool_ntuple_list.count = 0;
 	INIT_LIST_HEAD(&dev->napi_list);
+	INIT_LIST_HEAD(&dev->unreg_list);
+	INIT_LIST_HEAD(&dev->link_watch_list);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 	strcpy(dev->name, name);
 	return dev;
 
-free_tx:
-	kfree(tx);
+free_pcpu:
+	free_percpu(dev->pcpu_refcnt);
+	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
 
 free_p:
 	kfree(p);
 	return NULL;
 }
-EXPORT_SYMBOL(alloc_netdev_mq);
+EXPORT_SYMBOL(alloc_netdev_mqs);
 
 /**
  *	free_netdev - free network device
@@ -5144,13 +5737,24 @@ void free_netdev(struct net_device *dev)
 	release_net(dev_net(dev));
 
 	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
+
+	kfree(rcu_dereference_raw(dev->ingress_queue));
 
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 
+	/* Clear ethtool n-tuple list */
+	ethtool_ntuple_flush(dev);
+
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
+	free_percpu(dev->pcpu_refcnt);
+	dev->pcpu_refcnt = NULL;
+
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		kfree((char *)dev - dev->padded);
@@ -5163,6 +5767,7 @@ void free_netdev(struct net_device *dev)
 	/* will free via device release */
 	put_device(&dev->dev);
 }
+EXPORT_SYMBOL(free_netdev);
 
 /**
  *	synchronize_net -  Synchronize with packet receive processing
@@ -5175,26 +5780,50 @@ void synchronize_net(void)
 	might_sleep();
 	synchronize_rcu();
 }
+EXPORT_SYMBOL(synchronize_net);
 
 /**
- *	unregister_netdevice - remove device from the kernel
+ *	unregister_netdevice_queue - remove device from the kernel
  *	@dev: device
+ *	@head: list
  *
  *	This function shuts down a device interface and removes it
  *	from the kernel tables.
+ *	If head not NULL, device is queued to be unregistered later.
  *
  *	Callers must hold the rtnl semaphore.  You may want
  *	unregister_netdev() instead of this.
  */
 
-void unregister_netdevice(struct net_device *dev)
+void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
 {
 	ASSERT_RTNL();
 
-	rollback_registered(dev);
-	/* Finish processing unregister after unlock */
-	net_set_todo(dev);
+	if (head) {
+		list_move_tail(&dev->unreg_list, head);
+	} else {
+		rollback_registered(dev);
+		/* Finish processing unregister after unlock */
+		net_set_todo(dev);
+	}
+}
+EXPORT_SYMBOL(unregister_netdevice_queue);
+
+/**
+ *	unregister_netdevice_many - unregister many devices
+ *	@head: list of devices
+ */
+void unregister_netdevice_many(struct list_head *head)
+{
+	struct net_device *dev;
+
+	if (!list_empty(head)) {
+		rollback_registered_many(head);
+		list_for_each_entry(dev, head, unreg_list)
+			net_set_todo(dev);
+	}
 }
+EXPORT_SYMBOL(unregister_netdevice_many);
 
 /**
  *	unregister_netdev - remove device from the kernel
@@ -5213,7 +5842,6 @@ void unregister_netdev(struct net_device *dev)
 	unregister_netdevice(dev);
 	rtnl_unlock();
 }
-
 EXPORT_SYMBOL(unregister_netdev);
 
 /**
@@ -5232,8 +5860,6 @@ EXPORT_SYMBOL(unregister_netdev);
 
 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
 {
-	char buf[IFNAMSIZ];
-	const char *destname;
 	int err;
 
 	ASSERT_RTNL();
@@ -5243,15 +5869,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	if (dev->features & NETIF_F_NETNS_LOCAL)
 		goto out;
 
-#ifdef CONFIG_SYSFS
-	/* Don't allow real devices to be moved when sysfs
-	 * is enabled.
-	 */
-	err = -EINVAL;
-	if (dev->dev.parent)
-		goto out;
-#endif
-
 	/* Ensure the device has been registrered */
 	err = -EINVAL;
 	if (dev->reg_state != NETREG_REGISTERED)
@@ -5266,20 +5883,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 * we can use it in the destination network namespace.
 	 */
 	err = -EEXIST;
-	destname = dev->name;
-	if (__dev_get_by_name(net, destname)) {
+	if (__dev_get_by_name(net, dev->name)) {
 		/* We get here if we can't use the current device name */
 		if (!pat)
 			goto out;
-		if (!dev_valid_name(pat))
-			goto out;
-		if (strchr(pat, '%')) {
-			if (__dev_alloc_name(net, pat, buf) < 0)
-				goto out;
-			destname = buf;
-		} else
-			destname = pat;
-		if (__dev_get_by_name(net, destname))
+		if (dev_get_valid_name(dev, pat, 1))
 			goto out;
 	}
 
@@ -5301,24 +5909,23 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 
 	/* Notify protocols, that we are about to destroy
 	   this device. They should clean all the things.
+
+	   Note that dev->reg_state stays at NETREG_REGISTERED.
+	   This is wanted because this way 8021q and macvlan know
+	   the device is just moving and can keep their slaves up.
 	*/
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
 	/*
 	 *	Flush the unicast and multicast chains
 	 */
-	dev_unicast_flush(dev);
-	dev_addr_discard(dev);
-
-	netdev_unregister_kobject(dev);
+	dev_uc_flush(dev);
+	dev_mc_flush(dev);
 
 	/* Actually switch the network namespace */
 	dev_net_set(dev, net);
 
-	/* Assign the new device name */
-	if (destname != dev->name)
-		strcpy(dev->name, destname);
-
 	/* If there is an ifindex conflict assign a new one */
 	if (__dev_get_by_index(net, dev->ifindex)) {
 		int iflink = (dev->iflink == dev->ifindex);
@@ -5328,7 +5935,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	}
 
 	/* Fixup kobjects */
-	err = netdev_register_kobject(dev);
+	err = device_rename(&dev->dev, dev->name);
 	WARN_ON(err);
 
 	/* Add the device back in the hashes */
@@ -5337,18 +5944,24 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	/* Notify protocols, that a new device appeared. */
 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
 
+	/*
+	 *	Prevent userspace races by waiting until the network
+	 *	device is fully setup before sending notifications.
+	 */
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+
 	synchronize_net();
 	err = 0;
 out:
 	return err;
 }
+EXPORT_SYMBOL_GPL(dev_change_net_namespace);
 
 static int dev_cpu_callback(struct notifier_block *nfb,
 			    unsigned long action,
 			    void *ocpu)
 {
 	struct sk_buff **list_skb;
-	struct Qdisc **list_net;
 	struct sk_buff *skb;
 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
 	struct softnet_data *sd, *oldsd;
@@ -5369,20 +5982,26 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	*list_skb = oldsd->completion_queue;
 	oldsd->completion_queue = NULL;
 
-	/* Find end of our output_queue. */
-	list_net = &sd->output_queue;
-	while (*list_net)
-		list_net = &(*list_net)->next_sched;
 	/* Append output queue from offline CPU. */
-	*list_net = oldsd->output_queue;
-	oldsd->output_queue = NULL;
+	if (oldsd->output_queue) {
+		*sd->output_queue_tailp = oldsd->output_queue;
+		sd->output_queue_tailp = oldsd->output_queue_tailp;
+		oldsd->output_queue = NULL;
+		oldsd->output_queue_tailp = &oldsd->output_queue;
+	}
 
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_enable();
 
 	/* Process offline CPU's input_pkt_queue */
-	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
+		netif_rx(skb);
+		input_queue_head_incr(oldsd);
+	}
+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
+		input_queue_head_incr(oldsd);
+	}
 
 	return NOTIFY_OK;
 }
@@ -5402,7 +6021,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one,
 					unsigned long mask)
 {
 	/* If device needs checksumming, downgrade to it. */
-        if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
+	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
 	else if (mask & NETIF_F_ALL_CSUM) {
 		/* If one device supports v4/v6 checksumming, set for all. */
@@ -5422,7 +6041,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one,
 	one |= NETIF_F_ALL_CSUM;
 
 	one |= all & NETIF_F_ONE_FOR_ALL;
-	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
+	all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
 
 	return all;
@@ -5491,6 +6110,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
 	return buffer;
 }
 
+static int __netdev_printk(const char *level, const struct net_device *dev,
+			   struct va_format *vaf)
+{
+	int r;
+
+	if (dev && dev->dev.parent)
+		r = dev_printk(level, dev->dev.parent, "%s: %pV",
+			       netdev_name(dev), vaf);
+	else if (dev)
+		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
+	else
+		r = printk("%s(NULL net_device): %pV", level, vaf);
+
+	return r;
+}
+
+int netdev_printk(const char *level, const struct net_device *dev,
+		  const char *format, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int r;
+
+	va_start(args, format);
+
+	vaf.fmt = format;
+	vaf.va = &args;
+
+	r = __netdev_printk(level, dev, &vaf);
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(netdev_printk);
+
+#define define_netdev_printk_level(func, level)			\
+int func(const struct net_device *dev, const char *fmt, ...)	\
+{								\
+	int r;							\
+	struct va_format vaf;					\
+	va_list args;						\
+								\
+	va_start(args, fmt);					\
+								\
+	vaf.fmt = fmt;						\
+	vaf.va = &args;						\
+								\
+	r = __netdev_printk(level, dev, &vaf);			\
+	va_end(args);						\
+								\
+	return r;						\
+}								\
+EXPORT_SYMBOL(func);
+
+define_netdev_printk_level(netdev_emerg, KERN_EMERG);
+define_netdev_printk_level(netdev_alert, KERN_ALERT);
+define_netdev_printk_level(netdev_crit, KERN_CRIT);
+define_netdev_printk_level(netdev_err, KERN_ERR);
+define_netdev_printk_level(netdev_warn, KERN_WARNING);
+define_netdev_printk_level(netdev_notice, KERN_NOTICE);
+define_netdev_printk_level(netdev_info, KERN_INFO);
+
 static void __net_exit netdev_exit(struct net *net)
 {
 	kfree(net->dev_name_head);
@@ -5504,14 +6185,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
 
 static void __net_exit default_device_exit(struct net *net)
 {
-	struct net_device *dev;
+	struct net_device *dev, *aux;
 	/*
-	 * Push all migratable of the network devices back to the
+	 * Push all migratable network devices back to the
 	 * initial network namespace
 	 */
 	rtnl_lock();
-restart:
-	for_each_netdev(net, dev) {
+	for_each_netdev_safe(net, dev, aux) {
 		int err;
 		char fb_name[IFNAMSIZ];
 
@@ -5519,11 +6199,9 @@ restart:
 		if (dev->features & NETIF_F_NETNS_LOCAL)
 			continue;
 
-		/* Delete virtual devices */
-		if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
-			dev->rtnl_link_ops->dellink(dev);
-			goto restart;
-		}
+		/* Leave virtual devices for the generic cleanup */
+		if (dev->rtnl_link_ops)
+			continue;
 
 		/* Push remaing network devices to init_net */
 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
@@ -5533,13 +6211,37 @@ restart:
 				__func__, dev->name, err);
 			BUG();
 		}
-		goto restart;
 	}
 	rtnl_unlock();
 }
 
+static void __net_exit default_device_exit_batch(struct list_head *net_list)
+{
+	/* At exit all network devices most be removed from a network
+	 * namespace.  Do this in the reverse order of registeration.
+	 * Do this across as many network namespaces as possible to
+	 * improve batching efficiency.
+	 */
+	struct net_device *dev;
+	struct net *net;
+	LIST_HEAD(dev_kill_list);
+
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list) {
+		for_each_netdev_reverse(net, dev) {
+			if (dev->rtnl_link_ops)
+				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
+			else
+				unregister_netdevice_queue(dev, &dev_kill_list);
+		}
+	}
+	unregister_netdevice_many(&dev_kill_list);
+	rtnl_unlock();
+}
+
 static struct pernet_operations __net_initdata default_device_ops = {
 	.exit = default_device_exit,
+	.exit_batch = default_device_exit_batch,
 };
 
 /*
@@ -5577,17 +6279,26 @@ static int __init net_dev_init(void)
 	 */
 
 	for_each_possible_cpu(i) {
-		struct softnet_data *queue;
+		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
-		queue = &per_cpu(softnet_data, i);
-		skb_queue_head_init(&queue->input_pkt_queue);
-		queue->completion_queue = NULL;
-		INIT_LIST_HEAD(&queue->poll_list);
+		memset(sd, 0, sizeof(*sd));
+		skb_queue_head_init(&sd->input_pkt_queue);
+		skb_queue_head_init(&sd->process_queue);
+		sd->completion_queue = NULL;
+		INIT_LIST_HEAD(&sd->poll_list);
+		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
+#ifdef CONFIG_RPS
+		sd->csd.func = rps_trigger_softirq;
+		sd->csd.info = sd;
+		sd->csd.flags = 0;
+		sd->cpu = i;
+#endif
 
-		queue->backlog.poll = process_backlog;
-		queue->backlog.weight = weight_p;
-		queue->backlog.gro_list = NULL;
-		queue->backlog.gro_count = 0;
+		sd->backlog.poll = process_backlog;
+		sd->backlog.weight = weight_p;
+		sd->backlog.gro_list = NULL;
+		sd->backlog.gro_count = 0;
 	}
 
 	dev_boot_phase = 0;
@@ -5622,47 +6333,9 @@ subsys_initcall(net_dev_init);
 
 static int __init initialize_hashrnd(void)
 {
-	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+	get_random_bytes(&hashrnd, sizeof(hashrnd));
 	return 0;
 }
 
 late_initcall_sync(initialize_hashrnd);
 
-EXPORT_SYMBOL(__dev_get_by_index);
-EXPORT_SYMBOL(__dev_get_by_name);
-EXPORT_SYMBOL(__dev_remove_pack);
-EXPORT_SYMBOL(dev_valid_name);
-EXPORT_SYMBOL(dev_add_pack);
-EXPORT_SYMBOL(dev_alloc_name);
-EXPORT_SYMBOL(dev_close);
-EXPORT_SYMBOL(dev_get_by_flags);
-EXPORT_SYMBOL(dev_get_by_index);
-EXPORT_SYMBOL(dev_get_by_name);
-EXPORT_SYMBOL(dev_open);
-EXPORT_SYMBOL(dev_queue_xmit);
-EXPORT_SYMBOL(dev_remove_pack);
-EXPORT_SYMBOL(dev_set_allmulti);
-EXPORT_SYMBOL(dev_set_promiscuity);
-EXPORT_SYMBOL(dev_change_flags);
-EXPORT_SYMBOL(dev_set_mtu);
-EXPORT_SYMBOL(dev_set_mac_address);
-EXPORT_SYMBOL(free_netdev);
-EXPORT_SYMBOL(netdev_boot_setup_check);
-EXPORT_SYMBOL(netdev_set_master);
-EXPORT_SYMBOL(netdev_state_change);
-EXPORT_SYMBOL(netif_receive_skb);
-EXPORT_SYMBOL(netif_rx);
-EXPORT_SYMBOL(register_gifconf);
-EXPORT_SYMBOL(register_netdevice);
-EXPORT_SYMBOL(register_netdevice_notifier);
-EXPORT_SYMBOL(skb_checksum_help);
-EXPORT_SYMBOL(synchronize_net);
-EXPORT_SYMBOL(unregister_netdevice);
-EXPORT_SYMBOL(unregister_netdevice_notifier);
-EXPORT_SYMBOL(net_enable_timestamp);
-EXPORT_SYMBOL(net_disable_timestamp);
-EXPORT_SYMBOL(dev_get_flags);
-
-EXPORT_SYMBOL(dev_load);
-
-EXPORT_PER_CPU_SYMBOL(softnet_data);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 00000000000..508f9c18992
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,741 @@
+/*
+ * net/core/dev_addr_lists.c - Functions for handling net device lists
+ * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This file contains functions for working with unicast, multicast and device
+ * addresses lists.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+
+/*
+ * General list handling functions
+ */
+
+static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
+{
+	struct netdev_hw_addr *ha;
+	int alloc_size;
+
+	if (addr_len > MAX_ADDR_LEN)
+		return -EINVAL;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    ha->type == addr_type) {
+			if (global) {
+				/* check if addr is already used as global */
+				if (ha->global_use)
+					return 0;
+				else
+					ha->global_use = true;
+			}
+			ha->refcount++;
+			return 0;
+		}
+	}
+
+
+	alloc_size = sizeof(*ha);
+	if (alloc_size < L1_CACHE_BYTES)
+		alloc_size = L1_CACHE_BYTES;
+	ha = kmalloc(alloc_size, GFP_ATOMIC);
+	if (!ha)
+		return -ENOMEM;
+	memcpy(ha->addr, addr, addr_len);
+	ha->type = addr_type;
+	ha->refcount = 1;
+	ha->global_use = global;
+	ha->synced = false;
+	list_add_tail_rcu(&ha->list, &list->list);
+	list->count++;
+	return 0;
+}
+
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+}
+
+static void ha_rcu_free(struct rcu_head *head)
+{
+	struct netdev_hw_addr *ha;
+
+	ha = container_of(head, struct netdev_hw_addr, rcu_head);
+	kfree(ha);
+}
+
+static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
+{
+	struct netdev_hw_addr *ha;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    (ha->type == addr_type || !addr_type)) {
+			if (global) {
+				if (!ha->global_use)
+					break;
+				else
+					ha->global_use = false;
+			}
+			if (--ha->refcount)
+				return 0;
+			list_del_rcu(&ha->list);
+			call_rcu(&ha->rcu_head, ha_rcu_free);
+			list->count--;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+}
+
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+			   struct netdev_hw_addr_list *from_list,
+			   int addr_len, unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha, *ha2;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+		if (err)
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	list_for_each_entry(ha2, &from_list->list, list) {
+		if (ha2 == ha)
+			break;
+		type = addr_type ? addr_type : ha2->type;
+		__hw_addr_del(to_list, ha2->addr, addr_len, type);
+	}
+	return err;
+}
+EXPORT_SYMBOL(__hw_addr_add_multiple);
+
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+			    struct netdev_hw_addr_list *from_list,
+			    int addr_len, unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+	}
+}
+EXPORT_SYMBOL(__hw_addr_del_multiple);
+
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+		   struct netdev_hw_addr_list *from_list,
+		   int addr_len)
+{
+	int err = 0;
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (!ha->synced) {
+			err = __hw_addr_add(to_list, ha->addr,
+					    addr_len, ha->type);
+			if (err)
+				break;
+			ha->synced = true;
+			ha->refcount++;
+		} else if (ha->refcount == 1) {
+			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
+		}
+	}
+	return err;
+}
+EXPORT_SYMBOL(__hw_addr_sync);
+
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+		      struct netdev_hw_addr_list *from_list,
+		      int addr_len)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (ha->synced) {
+			__hw_addr_del(to_list, ha->addr,
+				      addr_len, ha->type);
+			ha->synced = false;
+			__hw_addr_del(from_list, ha->addr,
+				      addr_len, ha->type);
+		}
+	}
+}
+EXPORT_SYMBOL(__hw_addr_unsync);
+
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		list_del_rcu(&ha->list);
+		call_rcu(&ha->rcu_head, ha_rcu_free);
+	}
+	list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_flush);
+
+void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+	INIT_LIST_HEAD(&list->list);
+	list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_init);
+
+/*
+ * Device addresses handling functions
+ */
+
+/**
+ *	dev_addr_flush - Flush device address list
+ *	@dev: device
+ *
+ *	Flush device address list and reset ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void dev_addr_flush(struct net_device *dev)
+{
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_flush(&dev->dev_addrs);
+	dev->dev_addr = NULL;
+}
+EXPORT_SYMBOL(dev_addr_flush);
+
+/**
+ *	dev_addr_init - Init device address list
+ *	@dev: device
+ *
+ *	Init device address list and create the first element,
+ *	used by ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_init(struct net_device *dev)
+{
+	unsigned char addr[MAX_ADDR_LEN];
+	struct netdev_hw_addr *ha;
+	int err;
+
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_init(&dev->dev_addrs);
+	memset(addr, 0, sizeof(addr));
+	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
+			    NETDEV_HW_ADDR_T_LAN);
+	if (!err) {
+		/*
+		 * Get the first (previously created) address from the list
+		 * and set dev_addr pointer to this location.
+		 */
+		ha = list_first_entry(&dev->dev_addrs.list,
+				      struct netdev_hw_addr, list);
+		dev->dev_addr = ha->addr;
+	}
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_init);
+
+/**
+ *	dev_addr_add - Add a device address
+ *	@dev: device
+ *	@addr: address to add
+ *	@addr_type: address type
+ *
+ *	Add a device address to the device or increase the reference count if
+ *	it already exists.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ *	dev_addr_del - Release a device address.
+ *	@dev: device
+ *	@addr: address to delete
+ *	@addr_type: address type
+ *
+ *	Release reference to a device address and remove it from the device
+ *	if the reference count drops to zero.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha;
+
+	ASSERT_RTNL();
+
+	/*
+	 * We can not remove the first address from the list because
+	 * dev->dev_addr points to that.
+	 */
+	ha = list_first_entry(&dev->dev_addrs.list,
+			      struct netdev_hw_addr, list);
+	if (ha->addr == dev->dev_addr && ha->refcount == 1)
+		return -ENOENT;
+
+	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
+			    addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ *	dev_addr_add_multiple - Add device addresses from another device
+ *	@to_dev: device to which addresses will be added
+ *	@from_dev: device from which addresses will be added
+ *	@addr_type: address type - 0 means type will be used from from_dev
+ *
+ *	Add device addresses of the one device to another.
+ **
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+				     to_dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ *	dev_addr_del_multiple - Delete device addresses by another device
+ *	@to_dev: device where the addresses will be deleted
+ *	@from_dev: device by which addresses the addresses will be deleted
+ *	@addr_type: address type - 0 means type will used from from_dev
+ *
+ *	Deletes addresses in to device by the list of addresses in from device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+			       to_dev->addr_len, addr_type);
+	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/*
+ * Unicast list handling functions
+ */
+
+/**
+ *	dev_uc_add - Add a secondary unicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a secondary unicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_uc_add(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_add);
+
+/**
+ *	dev_uc_del - Release secondary unicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a secondary unicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_uc_del(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_del);
+
+/**
+ *	dev_uc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_tx_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_rx_mode
+ *	function of layered software devices.
+ */
+int dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_bh(to);
+	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock_bh(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_sync);
+
+/**
+ *	dev_uc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_uc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_uc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock(to);
+	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_uc_unsync);
+
+/**
+ *	dev_uc_flush - Flush unicast addresses
+ *	@dev: device
+ *
+ *	Flush unicast addresses.
+ */
+void dev_uc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->uc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_uc_flush);
+
+/**
+ *	dev_uc_flush - Init unicast address list
+ *	@dev: device
+ *
+ *	Init unicast address list.
+ */
+void dev_uc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->uc);
+}
+EXPORT_SYMBOL(dev_uc_init);
+
+/*
+ * Multicast list handling functions
+ */
+
+static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+/**
+ *	dev_mc_add - Add a multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a multicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_mc_add(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_add);
+
+/**
+ *	dev_mc_add_global - Add a global multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a global multicast address to the device.
+ */
+int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_add_global);
+
+static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+
+/**
+ *	dev_mc_del - Delete a multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_del);
+
+/**
+ *	dev_mc_del_global - Delete a global multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_del_global);
+
+/**
+ *	dev_mc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_tx_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_multicast_list
+ *	or dev->set_rx_mode function of layered software devices.
+ */
+int dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_bh(to);
+	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock_bh(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_mc_sync);
+
+/**
+ *	dev_mc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_mc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_mc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock(to);
+	__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_mc_unsync);
+
+/**
+ *	dev_mc_flush - Flush multicast addresses
+ *	@dev: device
+ *
+ *	Flush multicast addresses.
+ */
+void dev_mc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->mc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_mc_flush);
+
+/**
+ *	dev_mc_flush - Init multicast address list
+ *	@dev: device
+ *
+ *	Init multicast address list.
+ */
+void dev_mc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->mc);
+}
+EXPORT_SYMBOL(dev_mc_init);
+
+#ifdef CONFIG_PROC_FS
+#include <linux/seq_file.h>
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct netdev_hw_addr *ha;
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	netif_addr_lock_bh(dev);
+	netdev_for_each_mc_addr(ha, dev) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, ha->refcount, ha->global_use);
+
+		for (i = 0; i < dev->addr_len; i++)
+			seq_printf(seq, "%02x", ha->addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+#endif
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+	proc_net_remove(net, "dev_mcast");
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+	.init = dev_mc_net_init,
+	.exit = dev_mc_net_exit,
+};
+
+void __init dev_mcast_init(void)
+{
+	register_pernet_subsys(&dev_mc_net_ops);
+}
+
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 9e2fa39f22a..00000000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- *	Linux NET3:	Multicast List maintenance.
- *
- *	Authors:
- *		Tim Kordas <tjk@nostromo.eeap.cwru.edu>
- *		Richard Underwood <richard@wuzz.demon.co.uk>
- *
- *	Stir fried together from the IP multicast and CAP patches above
- *		Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- *	Fixes:
- *		Alan Cox	:	Update the device on a real delete
- *					rather than any time but...
- *		Alan Cox	:	IFF_ALLMULTI support.
- *		Alan Cox	: 	New format set_multicast_list() calls.
- *		Gleb Natapov    :       Remove dev_mc_lock.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-
-
-/*
- *	Device multicast list maintenance.
- *
- *	This is used both by IP and by the user level maintenance functions.
- *	Unlike BSD we maintain a usage count on a given multicast address so
- *	that a casual user application can add/delete multicasts used by
- *	protocols without doing damage to the protocols when it deletes the
- *	entries. It also helps IP as it tracks overlapping maps.
- *
- *	Device mc lists are changed by bh at least if IPv6 is enabled,
- *	so that it must be bh protected.
- *
- *	We block accesses to device mc filters with netif_tx_lock.
- */
-
-/*
- *	Delete a device level multicast
- */
-
-int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
-{
-	int err;
-
-	netif_addr_lock_bh(dev);
-	err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
-				addr, alen, glbl);
-	if (!err) {
-		/*
-		 *	We have altered the list, so the card
-		 *	loaded filter is now wrong. Fix it
-		 */
-
-		__dev_set_rx_mode(dev);
-	}
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-
-/*
- *	Add a device level multicast
- */
-
-int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
-{
-	int err;
-
-	netif_addr_lock_bh(dev);
-	err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-
-/**
- *	dev_mc_sync	- Synchronize device's multicast list to another device
- *	@to: destination device
- *	@from: source device
- *
- * 	Add newly added addresses to the destination device and release
- * 	addresses that have no users left. The source device must be
- * 	locked by netif_tx_lock_bh.
- *
- *	This function is intended to be called from the dev->set_multicast_list
- *	or dev->set_rx_mode function of layered software devices.
- */
-int dev_mc_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0;
-
-	netif_addr_lock_bh(to);
-	err = __dev_addr_sync(&to->mc_list, &to->mc_count,
-			      &from->mc_list, &from->mc_count);
-	if (!err)
-		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
-
-	return err;
-}
-EXPORT_SYMBOL(dev_mc_sync);
-
-
-/**
- * 	dev_mc_unsync	- Remove synchronized addresses from the destination
- * 			  device
- *	@to: destination device
- *	@from: source device
- *
- * 	Remove all addresses that were added to the destination device by
- * 	dev_mc_sync(). This function is intended to be called from the
- * 	dev->stop function of layered software devices.
- */
-void dev_mc_unsync(struct net_device *to, struct net_device *from)
-{
-	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
-
-	__dev_addr_unsync(&to->mc_list, &to->mc_count,
-			  &from->mc_list, &from->mc_count);
-	__dev_set_rx_mode(to);
-
-	netif_addr_unlock(to);
-	netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_mc_unsync);
-
-#ifdef CONFIG_PROC_FS
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
-	struct dev_addr_list *m;
-	struct net_device *dev = v;
-
-	if (v == SEQ_START_TOKEN)
-		return 0;
-
-	netif_addr_lock_bh(dev);
-	for (m = dev->mc_list; m; m = m->next) {
-		int i;
-
-		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-			   dev->name, m->dmi_users, m->dmi_gusers);
-
-		for (i = 0; i < m->dmi_addrlen; i++)
-			seq_printf(seq, "%02x", m->dmi_addr[i]);
-
-		seq_putc(seq, '\n');
-	}
-	netif_addr_unlock_bh(dev);
-	return 0;
-}
-
-static const struct seq_operations dev_mc_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_mc_seq_show,
-};
-
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_mc_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-#endif
-
-static int __net_init dev_mc_net_init(struct net *net)
-{
-	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
-		return -ENOMEM;
-	return 0;
-}
-
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
-	proc_net_remove(net, "dev_mcast");
-}
-
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
-	.init = dev_mc_net_init,
-	.exit = dev_mc_net_exit,
-};
-
-void __init dev_mcast_init(void)
-{
-	register_pernet_subsys(&dev_mc_net_ops);
-}
-
-EXPORT_SYMBOL(dev_mc_add);
-EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9d66fa953ab..36e603c78ce 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -21,6 +21,7 @@
 #include <linux/percpu.h>
 #include <linux/timer.h>
 #include <linux/bitops.h>
+#include <linux/slab.h>
 #include <net/genetlink.h>
 #include <net/netevent.h>
 
@@ -41,7 +42,7 @@ static void send_dm_alert(struct work_struct *unused);
  * netlink alerts
  */
 static int trace_state = TRACE_OFF;
-static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(trace_state_lock);
 
 struct per_cpu_dm_data {
 	struct work_struct dm_alert_work;
@@ -52,6 +53,7 @@ struct per_cpu_dm_data {
 
 struct dm_hw_stat_delta {
 	struct net_device *dev;
+	unsigned long last_rx;
 	struct list_head list;
 	struct rcu_head rcu;
 	unsigned long last_drop_val;
@@ -170,27 +172,35 @@ out:
 	return;
 }
 
-static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
 {
 	trace_drop_common(skb, location);
 }
 
-static void trace_napi_poll_hit(struct napi_struct *napi)
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
 {
 	struct dm_hw_stat_delta *new_stat;
 
 	/*
-	 * Ratelimit our check time to dm_hw_check_delta jiffies
+	 * Don't check napi structures with no associated device
 	 */
-	if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta))
+	if (!napi->dev)
 		return;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+		/*
+		 * only add a note to our monitor buffer if:
+		 * 1) this is the dev we received on
+		 * 2) its after the last_rx delta
+		 * 3) our rx_dropped count has gone up
+		 */
 		if ((new_stat->dev == napi->dev)  &&
+		    (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
 		    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
 			trace_drop_common(NULL, NULL);
 			new_stat->last_drop_val = napi->dev->stats.rx_dropped;
+			new_stat->last_rx = jiffies;
 			break;
 		}
 	}
@@ -213,14 +223,19 @@ static int set_all_monitor_traces(int state)
 
 	spin_lock(&trace_state_lock);
 
+	if (state == trace_state) {
+		rc = -EAGAIN;
+		goto out_unlock;
+	}
+
 	switch (state) {
 	case TRACE_ON:
-		rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
-		rc |= register_trace_napi_poll(trace_napi_poll_hit);
+		rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+		rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
 		break;
 	case TRACE_OFF:
-		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
-		rc |= unregister_trace_napi_poll(trace_napi_poll_hit);
+		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+		rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
 
 		tracepoint_synchronize_unregister();
 
@@ -241,11 +256,12 @@ static int set_all_monitor_traces(int state)
 
 	if (!rc)
 		trace_state = state;
+	else
+		rc = -EINPROGRESS;
 
+out_unlock:
 	spin_unlock(&trace_state_lock);
 
-	if (rc)
-		return -EINPROGRESS;
 	return rc;
 }
 
@@ -286,7 +302,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 			goto out;
 
 		new_stat->dev = dev;
-		INIT_RCU_HEAD(&new_stat->rcu);
+		new_stat->last_rx = jiffies;
 		spin_lock(&trace_state_lock);
 		list_add_rcu(&new_stat->list, &hw_stats_list);
 		spin_unlock(&trace_state_lock);
@@ -331,9 +347,9 @@ static struct notifier_block dropmon_net_notifier = {
 
 static int __init init_net_drop_monitor(void)
 {
-	int cpu;
-	int rc, i, ret;
 	struct per_cpu_dm_data *data;
+	int cpu, rc;
+
 	printk(KERN_INFO "Initalizing network drop monitor service\n");
 
 	if (sizeof(void *) > 8) {
@@ -341,21 +357,12 @@ static int __init init_net_drop_monitor(void)
 		return -ENOSPC;
 	}
 
-	if (genl_register_family(&net_drop_monitor_family) < 0) {
+	rc = genl_register_family_with_ops(&net_drop_monitor_family,
+					   dropmon_ops,
+					   ARRAY_SIZE(dropmon_ops));
+	if (rc) {
 		printk(KERN_ERR "Could not create drop monitor netlink family\n");
-		return -EFAULT;
-	}
-
-	rc = -EFAULT;
-
-	for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) {
-		ret = genl_register_ops(&net_drop_monitor_family,
-					&dropmon_ops[i]);
-		if (ret) {
-			printk(KERN_CRIT "Failed to register operation %d\n",
-				dropmon_ops[i].cmd);
-			goto out_unreg;
-		}
+		return rc;
 	}
 
 	rc = register_netdevice_notifier(&dropmon_net_notifier);
diff --git a/net/core/dst.c b/net/core/dst.c
index 57bc4d5b8d0..b99c7c7ffce 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -12,11 +12,13 @@
 #include <linux/workqueue.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <net/net_namespace.h>
+#include <linux/sched.h>
 
 #include <net/dst.h>
 
@@ -42,7 +44,7 @@ static atomic_t			 dst_total = ATOMIC_INIT(0);
  */
 static struct {
 	spinlock_t		lock;
-	struct dst_entry 	*list;
+	struct dst_entry	*list;
 	unsigned long		timer_inc;
 	unsigned long		timer_expires;
 } dst_garbage = {
@@ -50,7 +52,7 @@ static struct {
 	.timer_inc = DST_GC_MAX,
 };
 static void dst_gc_task(struct work_struct *work);
-static void ___dst_free(struct dst_entry * dst);
+static void ___dst_free(struct dst_entry *dst);
 
 static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
 
@@ -79,6 +81,7 @@ loop:
 	while ((dst = next) != NULL) {
 		next = dst->next;
 		prefetch(&next->next);
+		cond_resched();
 		if (likely(atomic_read(&dst->__refcnt))) {
 			last->next = dst;
 			last = dst;
@@ -133,8 +136,8 @@ loop:
 		}
 		expires = dst_garbage.timer_expires;
 		/*
-		 * if the next desired timer is more than 4 seconds in the future
-		 * then round the timer to whole seconds
+		 * if the next desired timer is more than 4 seconds in the
+		 * future then round the timer to whole seconds
 		 */
 		if (expires > 4*HZ)
 			expires = round_jiffies_relative(expires);
@@ -149,7 +152,8 @@ loop:
 		" expires: %lu elapsed: %lu us\n",
 		atomic_read(&dst_total), delayed, work_performed,
 		expires,
-		elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC);
+		elapsed.tv_sec * USEC_PER_SEC +
+		  elapsed.tv_nsec / NSEC_PER_USEC);
 #endif
 }
 
@@ -160,11 +164,11 @@ int dst_discard(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard);
 
-void * dst_alloc(struct dst_ops * ops)
+void *dst_alloc(struct dst_ops *ops)
 {
-	struct dst_entry * dst;
+	struct dst_entry *dst;
 
-	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+	if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
 		if (ops->gc(ops))
 			return NULL;
 	}
@@ -179,22 +183,22 @@ void * dst_alloc(struct dst_ops * ops)
 #if RT_CACHE_DEBUG >= 2
 	atomic_inc(&dst_total);
 #endif
-	atomic_inc(&ops->entries);
+	dst_entries_add(ops, 1);
 	return dst;
 }
+EXPORT_SYMBOL(dst_alloc);
 
-static void ___dst_free(struct dst_entry * dst)
+static void ___dst_free(struct dst_entry *dst)
 {
 	/* The first case (dev==NULL) is required, when
 	   protocol module is unloaded.
 	 */
-	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
 		dst->input = dst->output = dst_discard;
-	}
 	dst->obsolete = 2;
 }
 
-void __dst_free(struct dst_entry * dst)
+void __dst_free(struct dst_entry *dst)
 {
 	spin_lock_bh(&dst_garbage.lock);
 	___dst_free(dst);
@@ -208,6 +212,7 @@ void __dst_free(struct dst_entry * dst)
 	}
 	spin_unlock_bh(&dst_garbage.lock);
 }
+EXPORT_SYMBOL(__dst_free);
 
 struct dst_entry *dst_destroy(struct dst_entry * dst)
 {
@@ -223,15 +228,15 @@ again:
 	child = dst->child;
 
 	dst->hh = NULL;
-	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
-		kfree(hh);
+	if (hh)
+		hh_cache_put(hh);
 
 	if (neigh) {
 		dst->neighbour = NULL;
 		neigh_release(neigh);
 	}
 
-	atomic_dec(&dst->ops->entries);
+	dst_entries_add(dst->ops, -1);
 
 	if (dst->ops->destroy)
 		dst->ops->destroy(dst);
@@ -259,19 +264,47 @@ again:
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(dst_destroy);
 
 void dst_release(struct dst_entry *dst)
 {
 	if (dst) {
-               int newrefcnt;
-
-		smp_mb__before_atomic_dec();
-               newrefcnt = atomic_dec_return(&dst->__refcnt);
-               WARN_ON(newrefcnt < 0);
+		int newrefcnt;
+
+		newrefcnt = atomic_dec_return(&dst->__refcnt);
+		WARN_ON(newrefcnt < 0);
+		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
+			dst = dst_destroy(dst);
+			if (dst)
+				__dst_free(dst);
+		}
 	}
 }
 EXPORT_SYMBOL(dst_release);
 
+/**
+ * skb_dst_set_noref - sets skb dst, without a reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst
+ * skb_dst_drop() should not dst_release() this dst
+ */
+void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+{
+	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+	/* If dst not in cache, we must take a reference, because
+	 * dst_release() will destroy dst as soon as its refcount becomes zero
+	 */
+	if (unlikely(dst->flags & DST_NOCACHE)) {
+		dst_hold(dst);
+		skb_dst_set(skb, dst);
+	} else {
+		skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
+	}
+}
+EXPORT_SYMBOL(skb_dst_set_noref);
+
 /* Dirty hack. We did it in 2.2 (in __dst_free),
  * we have _very_ good reasons not to repeat
  * this mistake in 2.3, but we have no choice
@@ -280,8 +313,8 @@ EXPORT_SYMBOL(dst_release);
  *
  * Commented and originally written by Alexey.
  */
-static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-			      int unregister)
+static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+		       int unregister)
 {
 	if (dst->ops->ifdown)
 		dst->ops->ifdown(dst, dev, unregister);
@@ -303,7 +336,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 	}
 }
 
-static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int dst_dev_event(struct notifier_block *this, unsigned long event,
+			 void *ptr)
 {
 	struct net_device *dev = ptr;
 	struct dst_entry *dst, *last = NULL;
@@ -326,9 +360,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
 			last->next = dst;
 		else
 			dst_busy_list = dst;
-		for (; dst; dst = dst->next) {
+		for (; dst; dst = dst->next)
 			dst_ifdown(dst, dev, event != NETDEV_DOWN);
-		}
 		mutex_unlock(&dst_gc_mutex);
 		break;
 	}
@@ -337,13 +370,10 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
 
 static struct notifier_block dst_dev_notifier = {
 	.notifier_call	= dst_dev_event,
+	.priority = -10, /* must be called after other network notifiers */
 };
 
 void __init dst_init(void)
 {
 	register_netdevice_notifier(&dst_dev_notifier);
 }
-
-EXPORT_SYMBOL(__dst_free);
-EXPORT_SYMBOL(dst_alloc);
-EXPORT_SYMBOL(dst_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d9d5160610d..17741782a34 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -17,7 +17,10 @@
 #include <linux/errno.h>
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
-#include <asm/uaccess.h>
+#include <linux/bitops.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
 
 /*
  * Some useful ethtool_ops methods that're device independent.
@@ -29,11 +32,19 @@ u32 ethtool_op_get_link(struct net_device *dev)
 {
 	return netif_carrier_ok(dev) ? 1 : 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_link);
+
+u32 ethtool_op_get_rx_csum(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_ALL_CSUM) != 0;
+}
+EXPORT_SYMBOL(ethtool_op_get_rx_csum);
 
 u32 ethtool_op_get_tx_csum(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_ALL_CSUM) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_tx_csum);
 
 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
 {
@@ -54,6 +65,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
 
 int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 {
@@ -64,11 +76,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
 
 u32 ethtool_op_get_sg(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_SG) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_sg);
 
 int ethtool_op_set_sg(struct net_device *dev, u32 data)
 {
@@ -79,11 +93,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_sg);
 
 u32 ethtool_op_get_tso(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_TSO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_tso);
 
 int ethtool_op_set_tso(struct net_device *dev, u32 data)
 {
@@ -94,11 +110,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tso);
 
 u32 ethtool_op_get_ufo(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_UFO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_ufo);
 
 int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 {
@@ -108,12 +126,14 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 		dev->features &= ~NETIF_F_UFO;
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_ufo);
 
 /* the following list of flags are the same as their associated
  * NETIF_F_xxx values in include/linux/netdevice.h
  */
 static const u32 flags_dup_features =
-	ETH_FLAG_LRO;
+	(ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
+	 ETH_FLAG_RXHASH);
 
 u32 ethtool_op_get_flags(struct net_device *dev)
 {
@@ -124,22 +144,36 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 
 	return dev->features & flags_dup_features;
 }
+EXPORT_SYMBOL(ethtool_op_get_flags);
 
-int ethtool_op_set_flags(struct net_device *dev, u32 data)
+int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
 {
-	if (data & ETH_FLAG_LRO)
-		dev->features |= NETIF_F_LRO;
-	else
-		dev->features &= ~NETIF_F_LRO;
+	if (data & ~supported)
+		return -EINVAL;
 
+	dev->features = ((dev->features & ~flags_dup_features) |
+			 (data & flags_dup_features));
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_flags);
+
+void ethtool_ntuple_flush(struct net_device *dev)
+{
+	struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
+
+	list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
+		list_del(&fsc->list);
+		kfree(fsc);
+	}
+	dev->ethtool_ntuple_list.count = 0;
+}
+EXPORT_SYMBOL(ethtool_ntuple_flush);
 
 /* Handlers for each ethtool command */
 
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
 {
-	struct ethtool_cmd cmd = { ETHTOOL_GSET };
+	struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
 	int err;
 
 	if (!dev->ethtool_ops->get_settings)
@@ -167,19 +201,30 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_settings(dev, &cmd);
 }
 
-static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
+						  void __user *useraddr)
 {
 	struct ethtool_drvinfo info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 
-	if (!ops->get_drvinfo)
-		return -EOPNOTSUPP;
-
 	memset(&info, 0, sizeof(info));
 	info.cmd = ETHTOOL_GDRVINFO;
-	ops->get_drvinfo(dev, &info);
+	if (ops && ops->get_drvinfo) {
+		ops->get_drvinfo(dev, &info);
+	} else if (dev->dev.parent && dev->dev.parent->driver) {
+		strlcpy(info.bus_info, dev_name(dev->dev.parent),
+			sizeof(info.bus_info));
+		strlcpy(info.driver, dev->dev.parent->driver->name,
+			sizeof(info.driver));
+	} else {
+		return -EOPNOTSUPP;
+	}
 
-	if (ops->get_sset_count) {
+	/*
+	 * this method of obtaining string set info is deprecated;
+	 * Use ETHTOOL_GSSET_INFO instead.
+	 */
+	if (ops && ops->get_sset_count) {
 		int rc;
 
 		rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -191,17 +236,10 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 		rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
 		if (rc >= 0)
 			info.n_priv_flags = rc;
-	} else {
-		/* code path for obsolete hooks */
-
-		if (ops->self_test_count)
-			info.testinfo_len = ops->self_test_count(dev);
-		if (ops->get_stats_count)
-			info.n_stats = ops->get_stats_count(dev);
 	}
-	if (ops->get_regs_len)
+	if (ops && ops->get_regs_len)
 		info.regdump_len = ops->get_regs_len(dev);
-	if (ops->get_eeprom_len)
+	if (ops && ops->get_eeprom_len)
 		info.eedump_len = ops->get_eeprom_len(dev);
 
 	if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -209,22 +247,94 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 	return 0;
 }
 
-static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
+						    void __user *useraddr)
 {
-	struct ethtool_rxnfc cmd;
+	struct ethtool_sset_info info;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 sset_mask;
+	int i, idx = 0, n_bits = 0, ret, rc;
+	u32 *info_buf = NULL;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&info, useraddr, sizeof(info)))
+		return -EFAULT;
+
+	/* store copy of mask, because we zero struct later on */
+	sset_mask = info.sset_mask;
+	if (!sset_mask)
+		return 0;
+
+	/* calculate size of return buffer */
+	n_bits = hweight64(sset_mask);
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GSSET_INFO;
+
+	info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
+	if (!info_buf)
+		return -ENOMEM;
+
+	/*
+	 * fill return buffer based on input bitmask and successful
+	 * get_sset_count return
+	 */
+	for (i = 0; i < 64; i++) {
+		if (!(sset_mask & (1ULL << i)))
+			continue;
+
+		rc = ops->get_sset_count(dev, i);
+		if (rc >= 0) {
+			info.sset_mask |= (1ULL << i);
+			info_buf[idx++] = rc;
+		}
+	}
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		goto out;
+
+	useraddr += offsetof(struct ethtool_sset_info, data);
+	if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
+		goto out;
+
+	ret = 0;
+
+out:
+	kfree(info_buf);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+						u32 cmd, void __user *useraddr)
+{
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 
 	if (!dev->ethtool_ops->set_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_SRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
+	return dev->ethtool_ops->set_rxnfc(dev, &info);
 }
 
-static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+						u32 cmd, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
 	void *rule_buf = NULL;
@@ -232,13 +342,22 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
 	if (!ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&info, useraddr, sizeof(info)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_GRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
-			rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
-					   GFP_USER);
+			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
+				rule_buf = kzalloc(info.rule_cnt * sizeof(u32),
+						   GFP_USER);
 			if (!rule_buf)
 				return -ENOMEM;
 		}
@@ -249,7 +368,7 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
 		goto err_out;
 
 	ret = -EFAULT;
-	if (copy_to_user(useraddr, &info, sizeof(info)))
+	if (copy_to_user(useraddr, &info, info_size))
 		goto err_out;
 
 	if (rule_buf) {
@@ -266,6 +385,421 @@ err_out:
 	return ret;
 }
 
+static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	struct ethtool_rxfh_indir *indir;
+	u32 table_size;
+	size_t full_size;
+	int ret;
+
+	if (!dev->ethtool_ops->get_rxfh_indir)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&table_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(table_size)))
+		return -EFAULT;
+
+	if (table_size >
+	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+		return -ENOMEM;
+	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+	indir = kzalloc(full_size, GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	indir->cmd = ETHTOOL_GRXFHINDIR;
+	indir->size = table_size;
+	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr, indir, full_size))
+		ret = -EFAULT;
+
+out:
+	kfree(indir);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	struct ethtool_rxfh_indir *indir;
+	u32 table_size;
+	size_t full_size;
+	int ret;
+
+	if (!dev->ethtool_ops->set_rxfh_indir)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&table_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(table_size)))
+		return -EFAULT;
+
+	if (table_size >
+	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+		return -ENOMEM;
+	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+	indir = kmalloc(full_size, GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	if (copy_from_user(indir, useraddr, full_size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
+
+out:
+	kfree(indir);
+	return ret;
+}
+
+static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
+			struct ethtool_rx_ntuple_flow_spec *spec,
+			struct ethtool_rx_ntuple_flow_spec_container *fsc)
+{
+
+	/* don't add filters forever */
+	if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) {
+		/* free the container */
+		kfree(fsc);
+		return;
+	}
+
+	/* Copy the whole filter over */
+	fsc->fs.flow_type = spec->flow_type;
+	memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
+	memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
+
+	fsc->fs.vlan_tag = spec->vlan_tag;
+	fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
+	fsc->fs.data = spec->data;
+	fsc->fs.data_mask = spec->data_mask;
+	fsc->fs.action = spec->action;
+
+	/* add to the list */
+	list_add_tail_rcu(&fsc->list, &list->list);
+	list->count++;
+}
+
+/*
+ * ethtool does not (or did not) set masks for flow parameters that are
+ * not specified, so if both value and mask are 0 then this must be
+ * treated as equivalent to a mask with all bits set.  Implement that
+ * here rather than in drivers.
+ */
+static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
+{
+	struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
+	struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
+
+	if (fs->flow_type != TCP_V4_FLOW &&
+	    fs->flow_type != UDP_V4_FLOW &&
+	    fs->flow_type != SCTP_V4_FLOW)
+		return;
+
+	if (!(entry->ip4src | mask->ip4src))
+		mask->ip4src = htonl(0xffffffff);
+	if (!(entry->ip4dst | mask->ip4dst))
+		mask->ip4dst = htonl(0xffffffff);
+	if (!(entry->psrc | mask->psrc))
+		mask->psrc = htons(0xffff);
+	if (!(entry->pdst | mask->pdst))
+		mask->pdst = htons(0xffff);
+	if (!(entry->tos | mask->tos))
+		mask->tos = 0xff;
+	if (!(fs->vlan_tag | fs->vlan_tag_mask))
+		fs->vlan_tag_mask = 0xffff;
+	if (!(fs->data | fs->data_mask))
+		fs->data_mask = 0xffffffffffffffffULL;
+}
+
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
+						    void __user *useraddr)
+{
+	struct ethtool_rx_ntuple cmd;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
+	int ret;
+
+	if (!(dev->features & NETIF_F_NTUPLE))
+		return -EINVAL;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+
+	rx_ntuple_fix_masks(&cmd.fs);
+
+	/*
+	 * Cache filter in dev struct for GET operation only if
+	 * the underlying driver doesn't have its own GET operation, and
+	 * only if the filter was added successfully.  First make sure we
+	 * can allocate the filter, then continue if successful.
+	 */
+	if (!ops->get_rx_ntuple) {
+		fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
+		if (!fsc)
+			return -ENOMEM;
+	}
+
+	ret = ops->set_rx_ntuple(dev, &cmd);
+	if (ret) {
+		kfree(fsc);
+		return ret;
+	}
+
+	if (!ops->get_rx_ntuple)
+		__rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc);
+
+	return ret;
+}
+
+static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_gstrings gstrings;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_rx_ntuple_flow_spec_container *fsc;
+	u8 *data;
+	char *p;
+	int ret, i, num_strings = 0;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
+		return -EFAULT;
+
+	ret = ops->get_sset_count(dev, gstrings.string_set);
+	if (ret < 0)
+		return ret;
+
+	gstrings.len = ret;
+
+	data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	if (ops->get_rx_ntuple) {
+		/* driver-specific filter grab */
+		ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
+		goto copy;
+	}
+
+	/* default ethtool filter grab */
+	i = 0;
+	p = (char *)data;
+	list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
+		sprintf(p, "Filter %d:\n", i);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+
+		switch (fsc->fs.flow_type) {
+		case TCP_V4_FLOW:
+			sprintf(p, "\tFlow Type: TCP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case UDP_V4_FLOW:
+			sprintf(p, "\tFlow Type: UDP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case SCTP_V4_FLOW:
+			sprintf(p, "\tFlow Type: SCTP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case AH_ESP_V4_FLOW:
+			sprintf(p, "\tFlow Type: AH ESP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case ESP_V4_FLOW:
+			sprintf(p, "\tFlow Type: ESP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IP_USER_FLOW:
+			sprintf(p, "\tFlow Type: Raw IP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IPV4_FLOW:
+			sprintf(p, "\tFlow Type: IPv4\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		default:
+			sprintf(p, "\tFlow Type: Unknown\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			goto unknown_filter;
+		}
+
+		/* now the rest of the filters */
+		switch (fsc->fs.flow_type) {
+		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+				fsc->fs.h_u.tcp_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+				fsc->fs.m_u.tcp_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+				fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+				fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
+				fsc->fs.h_u.tcp_ip4_spec.psrc,
+				fsc->fs.m_u.tcp_ip4_spec.psrc);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
+				fsc->fs.h_u.tcp_ip4_spec.pdst,
+				fsc->fs.m_u.tcp_ip4_spec.pdst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
+				fsc->fs.h_u.tcp_ip4_spec.tos,
+				fsc->fs.m_u.tcp_ip4_spec.tos);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case AH_ESP_V4_FLOW:
+		case ESP_V4_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+				fsc->fs.h_u.ah_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+				fsc->fs.m_u.ah_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+				fsc->fs.h_u.ah_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+				fsc->fs.m_u.ah_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSPI: %d, mask: 0x%x\n",
+				fsc->fs.h_u.ah_ip4_spec.spi,
+				fsc->fs.m_u.ah_ip4_spec.spi);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
+				fsc->fs.h_u.ah_ip4_spec.tos,
+				fsc->fs.m_u.ah_ip4_spec.tos);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IP_USER_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+				fsc->fs.h_u.usr_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+				fsc->fs.m_u.usr_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+				fsc->fs.h_u.usr_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+				fsc->fs.m_u.usr_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IPV4_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+				fsc->fs.h_u.usr_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+				fsc->fs.m_u.usr_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+				fsc->fs.h_u.usr_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+				fsc->fs.m_u.usr_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
+				fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+				fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
+				fsc->fs.h_u.usr_ip4_spec.tos,
+				fsc->fs.m_u.usr_ip4_spec.tos);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
+				fsc->fs.h_u.usr_ip4_spec.ip_ver,
+				fsc->fs.m_u.usr_ip4_spec.ip_ver);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
+				fsc->fs.h_u.usr_ip4_spec.proto,
+				fsc->fs.m_u.usr_ip4_spec.proto);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		}
+		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
+			fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+		sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+		sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+		if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
+			sprintf(p, "\tAction: Drop\n");
+		else
+			sprintf(p, "\tAction: Direct to queue %d\n",
+				fsc->fs.action);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+unknown_filter:
+		i++;
+	}
+copy:
+	/* indicate to userspace how many strings we actually have */
+	gstrings.len = num_strings;
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
+		goto out;
+	useraddr += sizeof(gstrings);
+	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
 static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_regs regs;
@@ -283,7 +817,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 	if (regs.len > reglen)
 		regs.len = reglen;
 
-	regbuf = kmalloc(reglen, GFP_USER);
+	regbuf = vmalloc(reglen);
 	if (!regbuf)
 		return -ENOMEM;
 
@@ -298,13 +832,33 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 	ret = 0;
 
  out:
-	kfree(regbuf);
+	vfree(regbuf);
 	return ret;
 }
 
+static int ethtool_reset(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value reset;
+	int ret;
+
+	if (!dev->ethtool_ops->reset)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&reset, useraddr, sizeof(reset)))
+		return -EFAULT;
+
+	ret = dev->ethtool_ops->reset(dev, &reset.data);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(useraddr, &reset, sizeof(reset)))
+		return -EFAULT;
+	return 0;
+}
+
 static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
 {
-	struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
 
 	if (!dev->ethtool_ops->get_wol)
 		return -EOPNOTSUPP;
@@ -337,6 +891,20 @@ static int ethtool_nway_reset(struct net_device *dev)
 	return dev->ethtool_ops->nway_reset(dev);
 }
 
+static int ethtool_get_link(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { .cmd = ETHTOOL_GLINK };
+
+	if (!dev->ethtool_ops->get_link)
+		return -EOPNOTSUPP;
+
+	edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
 static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_eeprom eeprom;
@@ -436,9 +1004,10 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
+						   void __user *useraddr)
 {
-	struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
+	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
 
 	if (!dev->ethtool_ops->get_coalesce)
 		return -EOPNOTSUPP;
@@ -450,7 +1019,8 @@ static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
 	return 0;
 }
 
-static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+						   void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce;
 
@@ -465,7 +1035,7 @@ static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
 
 static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
 {
-	struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
+	struct ethtool_ringparam ringparam = { .cmd = ETHTOOL_GRINGPARAM };
 
 	if (!dev->ethtool_ops->get_ringparam)
 		return -EOPNOTSUPP;
@@ -554,6 +1124,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
 
 	return dev->ethtool_ops->set_tx_csum(dev, edata.data);
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
 
 static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
 {
@@ -614,7 +1185,9 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
 		return -EFAULT;
 	if (edata.data && !(dev->features & NETIF_F_SG))
 		return -EINVAL;
-	if (edata.data && !(dev->features & NETIF_F_HW_CSUM))
+	if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) ||
+		(dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+			== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
 		return -EINVAL;
 	return dev->ethtool_ops->set_ufo(dev, edata.data);
 }
@@ -625,7 +1198,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
 
 	edata.data = dev->features & NETIF_F_GSO;
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		 return -EFAULT;
+		return -EFAULT;
 	return 0;
 }
 
@@ -648,7 +1221,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
 
 	edata.data = dev->features & NETIF_F_GRO;
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		 return -EFAULT;
+		return -EFAULT;
 	return 0;
 }
 
@@ -660,8 +1233,11 @@ static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
 		return -EFAULT;
 
 	if (edata.data) {
-		if (!dev->ethtool_ops->get_rx_csum ||
-		    !dev->ethtool_ops->get_rx_csum(dev))
+		u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
+				dev->ethtool_ops->get_rx_csum(dev) :
+				ethtool_op_get_rx_csum(dev);
+
+		if (!rxcsum)
 			return -EINVAL;
 		dev->features |= NETIF_F_GRO;
 	} else
@@ -677,16 +1253,10 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
 	u64 *data;
 	int ret, test_len;
 
-	if (!ops->self_test)
-		return -EOPNOTSUPP;
-	if (!ops->get_sset_count && !ops->self_test_count)
+	if (!ops->self_test || !ops->get_sset_count)
 		return -EOPNOTSUPP;
 
-	if (ops->get_sset_count)
-		test_len = ops->get_sset_count(dev, ETH_SS_TEST);
-	else
-		/* code path for obsolete hook */
-		test_len = ops->self_test_count(dev);
+	test_len = ops->get_sset_count(dev, ETH_SS_TEST);
 	if (test_len < 0)
 		return test_len;
 	WARN_ON(test_len == 0);
@@ -721,36 +1291,17 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
 	u8 *data;
 	int ret;
 
-	if (!ops->get_strings)
+	if (!ops->get_strings || !ops->get_sset_count)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
 		return -EFAULT;
 
-	if (ops->get_sset_count) {
-		ret = ops->get_sset_count(dev, gstrings.string_set);
-		if (ret < 0)
-			return ret;
-
-		gstrings.len = ret;
-	} else {
-		/* code path for obsolete hooks */
+	ret = ops->get_sset_count(dev, gstrings.string_set);
+	if (ret < 0)
+		return ret;
 
-		switch (gstrings.string_set) {
-		case ETH_SS_TEST:
-			if (!ops->self_test_count)
-				return -EOPNOTSUPP;
-			gstrings.len = ops->self_test_count(dev);
-			break;
-		case ETH_SS_STATS:
-			if (!ops->get_stats_count)
-				return -EOPNOTSUPP;
-			gstrings.len = ops->get_stats_count(dev);
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
+	gstrings.len = ret;
 
 	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
 	if (!data)
@@ -791,16 +1342,10 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
 	u64 *data;
 	int ret, n_stats;
 
-	if (!ops->get_ethtool_stats)
-		return -EOPNOTSUPP;
-	if (!ops->get_sset_count && !ops->get_stats_count)
+	if (!ops->get_ethtool_stats || !ops->get_sset_count)
 		return -EOPNOTSUPP;
 
-	if (ops->get_sset_count)
-		n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
-	else
-		/* code path for obsolete hook */
-		n_stats = ops->get_stats_count(dev);
+	n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
 	if (n_stats < 0)
 		return n_stats;
 	WARN_ON(n_stats == 0);
@@ -850,7 +1395,7 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
 static int ethtool_get_value(struct net_device *dev, char __user *useraddr,
 			     u32 cmd, u32 (*actor)(struct net_device *))
 {
-	struct ethtool_value edata = { cmd };
+	struct ethtool_value edata = { .cmd = cmd };
 
 	if (!actor)
 		return -EOPNOTSUPP;
@@ -891,6 +1436,20 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
 	return actor(dev, edata.data);
 }
 
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
+						   char __user *useraddr)
+{
+	struct ethtool_flash efl;
+
+	if (copy_from_user(&efl, useraddr, sizeof(efl)))
+		return -EFAULT;
+
+	if (!dev->ethtool_ops->flash_device)
+		return -EOPNOTSUPP;
+
+	return dev->ethtool_ops->flash_device(dev, &efl);
+}
+
 /* The main entry point in this file.  Called from net/core/dev.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -904,14 +1463,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	if (!dev || !netif_device_present(dev))
 		return -ENODEV;
 
-	if (!dev->ethtool_ops)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+	if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
 		return -EFAULT;
 
+	if (!dev->ethtool_ops) {
+		/* ETHTOOL_GDRVINFO does not require any driver support.
+		 * It is also unprivileged and does not change anything,
+		 * so we can take a shortcut to it. */
+		if (ethcmd == ETHTOOL_GDRVINFO)
+			return ethtool_get_drvinfo(dev, useraddr);
+		else
+			return -EOPNOTSUPP;
+	}
+
 	/* Allow some commands to be done by anyone */
-	switch(ethcmd) {
+	switch (ethcmd) {
+	case ETHTOOL_GSET:
 	case ETHTOOL_GDRVINFO:
 	case ETHTOOL_GMSGLVL:
 	case ETHTOOL_GCOALESCE:
@@ -925,6 +1492,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GPERMADDR:
 	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
+	case ETHTOOL_GGRO:
 	case ETHTOOL_GFLAGS:
 	case ETHTOOL_GPFLAGS:
 	case ETHTOOL_GRXFH:
@@ -938,10 +1506,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 			return -EPERM;
 	}
 
-	if (dev->ethtool_ops->begin)
-		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+	if (dev->ethtool_ops->begin) {
+		rc = dev->ethtool_ops->begin(dev);
+		if (rc  < 0)
 			return rc;
-
+	}
 	old_features = dev->features;
 
 	switch (ethcmd) {
@@ -975,8 +1544,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		rc = ethtool_nway_reset(dev);
 		break;
 	case ETHTOOL_GLINK:
-		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       dev->ethtool_ops->get_link);
+		rc = ethtool_get_link(dev, useraddr);
 		break;
 	case ETHTOOL_GEEPROM:
 		rc = ethtool_get_eeprom(dev, useraddr);
@@ -1004,7 +1572,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		break;
 	case ETHTOOL_GRXCSUM:
 		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       dev->ethtool_ops->get_rx_csum);
+				       (dev->ethtool_ops->get_rx_csum ?
+					dev->ethtool_ops->get_rx_csum :
+					ethtool_op_get_rx_csum));
 		break;
 	case ETHTOOL_SRXCSUM:
 		rc = ethtool_set_rx_csum(dev, useraddr);
@@ -1068,7 +1638,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		break;
 	case ETHTOOL_GFLAGS:
 		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       dev->ethtool_ops->get_flags);
+				       (dev->ethtool_ops->get_flags ?
+					dev->ethtool_ops->get_flags :
+					ethtool_op_get_flags));
 		break;
 	case ETHTOOL_SFLAGS:
 		rc = ethtool_set_value(dev, useraddr,
@@ -1087,12 +1659,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCLSRLCNT:
 	case ETHTOOL_GRXCLSRULE:
 	case ETHTOOL_GRXCLSRLALL:
-		rc = ethtool_get_rxnfc(dev, useraddr);
+		rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_SRXFH:
 	case ETHTOOL_SRXCLSRLDEL:
 	case ETHTOOL_SRXCLSRLINS:
-		rc = ethtool_set_rxnfc(dev, useraddr);
+		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_GGRO:
 		rc = ethtool_get_gro(dev, useraddr);
@@ -1100,6 +1672,27 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SGRO:
 		rc = ethtool_set_gro(dev, useraddr);
 		break;
+	case ETHTOOL_FLASHDEV:
+		rc = ethtool_flash_device(dev, useraddr);
+		break;
+	case ETHTOOL_RESET:
+		rc = ethtool_reset(dev, useraddr);
+		break;
+	case ETHTOOL_SRXNTUPLE:
+		rc = ethtool_set_rx_ntuple(dev, useraddr);
+		break;
+	case ETHTOOL_GRXNTUPLE:
+		rc = ethtool_get_rx_ntuple(dev, useraddr);
+		break;
+	case ETHTOOL_GSSET_INFO:
+		rc = ethtool_get_sset_info(dev, useraddr);
+		break;
+	case ETHTOOL_GRXFHINDIR:
+		rc = ethtool_get_rxfh_indir(dev, useraddr);
+		break;
+	case ETHTOOL_SRXFHINDIR:
+		rc = ethtool_set_rxfh_indir(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
@@ -1112,17 +1705,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 
 	return rc;
 }
-
-EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL(ethtool_op_get_sg);
-EXPORT_SYMBOL(ethtool_op_get_tso);
-EXPORT_SYMBOL(ethtool_op_get_tx_csum);
-EXPORT_SYMBOL(ethtool_op_set_sg);
-EXPORT_SYMBOL(ethtool_op_set_tso);
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-EXPORT_SYMBOL(ethtool_op_set_flags);
-EXPORT_SYMBOL(ethtool_op_get_flags);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bd309384f8b..a20e5d3bbfa 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -38,6 +39,24 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 }
 EXPORT_SYMBOL(fib_default_rule_add);
 
+u32 fib_default_rule_pref(struct fib_rules_ops *ops)
+{
+	struct list_head *pos;
+	struct fib_rule *rule;
+
+	if (!list_empty(&ops->rules_list)) {
+		pos = ops->rules_list.next;
+		if (pos->next != &ops->rules_list) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(fib_default_rule_pref);
+
 static void notify_rule_change(int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
 			       u32 pid);
@@ -72,7 +91,7 @@ static void flush_route_cache(struct fib_rules_ops *ops)
 		ops->flush_cache(ops);
 }
 
-int fib_rules_register(struct fib_rules_ops *ops)
+static int __fib_rules_register(struct fib_rules_ops *ops)
 {
 	int err = -EEXIST;
 	struct fib_rules_ops *o;
@@ -102,9 +121,30 @@ errout:
 	return err;
 }
 
+struct fib_rules_ops *
+fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
+{
+	struct fib_rules_ops *ops;
+	int err;
+
+	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
+	if (ops == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&ops->rules_list);
+	ops->fro_net = net;
+
+	err = __fib_rules_register(ops);
+	if (err) {
+		kfree(ops);
+		ops = ERR_PTR(err);
+	}
+
+	return ops;
+}
 EXPORT_SYMBOL_GPL(fib_rules_register);
 
-void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
+static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 {
 	struct fib_rule *rule, *tmp;
 
@@ -113,7 +153,15 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 		fib_rule_put(rule);
 	}
 }
-EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
+
+static void fib_rules_put_rcu(struct rcu_head *head)
+{
+	struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu);
+	struct net *net = ops->fro_net;
+
+	release_net(net);
+	kfree(ops);
+}
 
 void fib_rules_unregister(struct fib_rules_ops *ops)
 {
@@ -124,10 +172,8 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 	fib_rules_cleanup_ops(ops);
 	spin_unlock(&net->rules_mod_lock);
 
-	synchronize_rcu();
-	release_net(net);
+	call_rcu(&ops->rcu, fib_rules_put_rcu);
 }
-
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
@@ -135,7 +181,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 {
 	int ret = 0;
 
-	if (rule->ifindex && (rule->ifindex != fl->iif))
+	if (rule->iifindex && (rule->iifindex != fl->iif))
+		goto out;
+
+	if (rule->oifindex && (rule->oifindex != fl->oif))
 		goto out;
 
 	if ((rule->mark ^ fl->mark) & rule->mark_mask)
@@ -175,9 +224,12 @@ jumped:
 			err = ops->action(rule, fl, flags, arg);
 
 		if (err != -EAGAIN) {
-			fib_rule_get(rule);
-			arg->rule = rule;
-			goto out;
+			if ((arg->flags & FIB_LOOKUP_NOREF) ||
+			    likely(atomic_inc_not_zero(&rule->refcnt))) {
+				arg->rule = rule;
+				goto out;
+			}
+			break;
 		}
 	}
 
@@ -187,7 +239,6 @@ out:
 
 	return err;
 }
-
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
@@ -248,14 +299,24 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (tb[FRA_PRIORITY])
 		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
 
-	if (tb[FRA_IFNAME]) {
+	if (tb[FRA_IIFNAME]) {
+		struct net_device *dev;
+
+		rule->iifindex = -1;
+		nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
+		dev = __dev_get_by_name(net, rule->iifname);
+		if (dev)
+			rule->iifindex = dev->ifindex;
+	}
+
+	if (tb[FRA_OIFNAME]) {
 		struct net_device *dev;
 
-		rule->ifindex = -1;
-		nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
-		dev = __dev_get_by_name(net, rule->ifname);
+		rule->oifindex = -1;
+		nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
+		dev = __dev_get_by_name(net, rule->oifname);
 		if (dev)
-			rule->ifindex = dev->ifindex;
+			rule->oifindex = dev->ifindex;
 	}
 
 	if (tb[FRA_FWMARK]) {
@@ -274,7 +335,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	rule->flags = frh->flags;
 	rule->table = frh_get_table(frh, tb);
 
-	if (!rule->pref && ops->default_pref)
+	if (!tb[FRA_PRIORITY] && ops->default_pref)
 		rule->pref = ops->default_pref(ops);
 
 	err = -EINVAL;
@@ -289,12 +350,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 		list_for_each_entry(r, &ops->rules_list, list) {
 			if (r->pref == rule->target) {
-				rule->ctarget = r;
+				RCU_INIT_POINTER(rule->ctarget, r);
 				break;
 			}
 		}
 
-		if (rule->ctarget == NULL)
+		if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
 			unresolved = 1;
 	} else if (rule->action == FR_ACT_GOTO)
 		goto errout_free;
@@ -311,6 +372,11 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	fib_rule_get(rule);
 
+	if (last)
+		list_add_rcu(&rule->list, &last->list);
+	else
+		list_add_rcu(&rule->list, &ops->rules_list);
+
 	if (ops->unresolved_rules) {
 		/*
 		 * There are unresolved goto rules in the list, check if
@@ -319,7 +385,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		list_for_each_entry(r, &ops->rules_list, list) {
 			if (r->action == FR_ACT_GOTO &&
 			    r->target == rule->pref) {
-				BUG_ON(r->ctarget != NULL);
+				BUG_ON(rtnl_dereference(r->ctarget) != NULL);
 				rcu_assign_pointer(r->ctarget, rule);
 				if (--ops->unresolved_rules == 0)
 					break;
@@ -333,11 +399,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (unresolved)
 		ops->unresolved_rules++;
 
-	if (last)
-		list_add_rcu(&rule->list, &last->list);
-	else
-		list_add_rcu(&rule->list, &ops->rules_list);
-
 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
 	flush_route_cache(ops);
 	rules_ops_put(ops);
@@ -388,8 +449,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
 			continue;
 
-		if (tb[FRA_IFNAME] &&
-		    nla_strcmp(tb[FRA_IFNAME], rule->ifname))
+		if (tb[FRA_IIFNAME] &&
+		    nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
+			continue;
+
+		if (tb[FRA_OIFNAME] &&
+		    nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
 			continue;
 
 		if (tb[FRA_FWMARK] &&
@@ -421,14 +486,13 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		 */
 		if (ops->nr_goto_rules > 0) {
 			list_for_each_entry(tmp, &ops->rules_list, list) {
-				if (tmp->ctarget == rule) {
+				if (rtnl_dereference(tmp->ctarget) == rule) {
 					rcu_assign_pointer(tmp->ctarget, NULL);
 					ops->unresolved_rules++;
 				}
 			}
 		}
 
-		synchronize_rcu();
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).pid);
 		fib_rule_put(rule);
@@ -447,7 +511,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 					 struct fib_rule *rule)
 {
 	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
-			 + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */
+			 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
+			 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
 			 + nla_total_size(4) /* FRA_PRIORITY */
 			 + nla_total_size(4) /* FRA_TABLE */
 			 + nla_total_size(4) /* FRA_FWMARK */
@@ -471,6 +536,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 		return -EMSGSIZE;
 
 	frh = nlmsg_data(nlh);
+	frh->family = ops->family;
 	frh->table = rule->table;
 	NLA_PUT_U32(skb, FRA_TABLE, rule->table);
 	frh->res1 = 0;
@@ -478,14 +544,22 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	frh->action = rule->action;
 	frh->flags = rule->flags;
 
-	if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+	if (rule->action == FR_ACT_GOTO &&
+	    rcu_dereference_raw(rule->ctarget) == NULL)
 		frh->flags |= FIB_RULE_UNRESOLVED;
 
-	if (rule->ifname[0]) {
-		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
+	if (rule->iifname[0]) {
+		NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname);
+
+		if (rule->iifindex == -1)
+			frh->flags |= FIB_RULE_IIF_DETACHED;
+	}
+
+	if (rule->oifname[0]) {
+		NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname);
 
-		if (rule->ifindex == -1)
-			frh->flags |= FIB_RULE_DEV_DETACHED;
+		if (rule->oifindex == -1)
+			frh->flags |= FIB_RULE_OIF_DETACHED;
 	}
 
 	if (rule->pref)
@@ -558,7 +632,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
 			break;
 
 		cb->args[1] = 0;
-	skip:
+skip:
 		idx++;
 	}
 	rcu_read_unlock();
@@ -600,9 +674,12 @@ static void attach_rules(struct list_head *rules, struct net_device *dev)
 	struct fib_rule *rule;
 
 	list_for_each_entry(rule, rules, list) {
-		if (rule->ifindex == -1 &&
-		    strcmp(dev->name, rule->ifname) == 0)
-			rule->ifindex = dev->ifindex;
+		if (rule->iifindex == -1 &&
+		    strcmp(dev->name, rule->iifname) == 0)
+			rule->iifindex = dev->ifindex;
+		if (rule->oifindex == -1 &&
+		    strcmp(dev->name, rule->oifname) == 0)
+			rule->oifindex = dev->ifindex;
 	}
 }
 
@@ -610,9 +687,12 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
 {
 	struct fib_rule *rule;
 
-	list_for_each_entry(rule, rules, list)
-		if (rule->ifindex == dev->ifindex)
-			rule->ifindex = -1;
+	list_for_each_entry(rule, rules, list) {
+		if (rule->iifindex == dev->ifindex)
+			rule->iifindex = -1;
+		if (rule->oifindex == dev->ifindex)
+			rule->oifindex = -1;
+	}
 }
 
 
@@ -624,7 +704,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
 	struct fib_rules_ops *ops;
 
 	ASSERT_RTNL();
-	rcu_read_lock();
 
 	switch (event) {
 	case NETDEV_REGISTER:
@@ -638,8 +717,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
 		break;
 	}
 
-	rcu_read_unlock();
-
 	return NOTIFY_DONE;
 }
 
@@ -647,7 +724,7 @@ static struct notifier_block fib_rules_notifier = {
 	.notifier_call = fib_rules_event,
 };
 
-static int fib_rules_net_init(struct net *net)
+static int __net_init fib_rules_net_init(struct net *net)
 {
 	INIT_LIST_HEAD(&net->rules_ops);
 	spin_lock_init(&net->rules_mod_lock);
diff --git a/net/core/filter.c b/net/core/filter.c
index d1d779ca096..afc58374ca9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -25,6 +25,7 @@
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/if_packet.h>
+#include <linux/gfp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/netlink.h>
@@ -36,9 +37,69 @@
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/filter.h>
+#include <linux/reciprocal_div.h>
+
+enum {
+	BPF_S_RET_K = 1,
+	BPF_S_RET_A,
+	BPF_S_ALU_ADD_K,
+	BPF_S_ALU_ADD_X,
+	BPF_S_ALU_SUB_K,
+	BPF_S_ALU_SUB_X,
+	BPF_S_ALU_MUL_K,
+	BPF_S_ALU_MUL_X,
+	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_AND_K,
+	BPF_S_ALU_AND_X,
+	BPF_S_ALU_OR_K,
+	BPF_S_ALU_OR_X,
+	BPF_S_ALU_LSH_K,
+	BPF_S_ALU_LSH_X,
+	BPF_S_ALU_RSH_K,
+	BPF_S_ALU_RSH_X,
+	BPF_S_ALU_NEG,
+	BPF_S_LD_W_ABS,
+	BPF_S_LD_H_ABS,
+	BPF_S_LD_B_ABS,
+	BPF_S_LD_W_LEN,
+	BPF_S_LD_W_IND,
+	BPF_S_LD_H_IND,
+	BPF_S_LD_B_IND,
+	BPF_S_LD_IMM,
+	BPF_S_LDX_W_LEN,
+	BPF_S_LDX_B_MSH,
+	BPF_S_LDX_IMM,
+	BPF_S_MISC_TAX,
+	BPF_S_MISC_TXA,
+	BPF_S_ALU_DIV_K,
+	BPF_S_LD_MEM,
+	BPF_S_LDX_MEM,
+	BPF_S_ST,
+	BPF_S_STX,
+	BPF_S_JMP_JA,
+	BPF_S_JMP_JEQ_K,
+	BPF_S_JMP_JEQ_X,
+	BPF_S_JMP_JGE_K,
+	BPF_S_JMP_JGE_X,
+	BPF_S_JMP_JGT_K,
+	BPF_S_JMP_JGT_X,
+	BPF_S_JMP_JSET_K,
+	BPF_S_JMP_JSET_X,
+	/* Ancillary data */
+	BPF_S_ANC_PROTOCOL,
+	BPF_S_ANC_PKTTYPE,
+	BPF_S_ANC_IFINDEX,
+	BPF_S_ANC_NLATTR,
+	BPF_S_ANC_NLATTR_NEST,
+	BPF_S_ANC_MARK,
+	BPF_S_ANC_QUEUE,
+	BPF_S_ANC_HATYPE,
+	BPF_S_ANC_RXHASH,
+	BPF_S_ANC_CPU,
+};
 
 /* No hurry in this branch */
-static void *__load_pointer(struct sk_buff *skb, int k)
+static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
 {
 	u8 *ptr = NULL;
 
@@ -47,21 +108,17 @@ static void *__load_pointer(struct sk_buff *skb, int k)
 	else if (k >= SKF_LL_OFF)
 		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 
-	if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
+	if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
 		return ptr;
 	return NULL;
 }
 
-static inline void *load_pointer(struct sk_buff *skb, int k,
+static inline void *load_pointer(const struct sk_buff *skb, int k,
 				 unsigned int size, void *buffer)
 {
 	if (k >= 0)
 		return skb_header_pointer(skb, k, size, buffer);
-	else {
-		if (k >= SKF_AD_OFF)
-			return NULL;
-		return __load_pointer(skb, k);
-	}
+	return __load_pointer(skb, k, size);
 }
 
 /**
@@ -86,10 +143,10 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 		return err;
 
 	rcu_read_lock_bh();
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = sk_run_filter(skb, filter->insns,
-				filter->len);
+		unsigned int pkt_len = sk_run_filter(skb, filter->insns);
+
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
 	rcu_read_unlock_bh();
@@ -101,209 +158,222 @@ EXPORT_SYMBOL(sk_filter);
 /**
  *	sk_run_filter - run a filter on a socket
  *	@skb: buffer to run the filter on
- *	@filter: filter to apply
- *	@flen: length of filter
+ *	@fentry: filter to apply
  *
  * Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
  */
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(const struct sk_buff *skb,
+			   const struct sock_filter *fentry)
 {
-	struct sock_filter *fentry;	/* We walk down these */
 	void *ptr;
 	u32 A = 0;			/* Accumulator */
 	u32 X = 0;			/* Index Register */
 	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
 	u32 tmp;
 	int k;
-	int pc;
 
 	/*
 	 * Process array of filter instructions.
 	 */
-	for (pc = 0; pc < flen; pc++) {
-		fentry = &filter[pc];
+	for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define	K (fentry->k)
+#else
+		const u32 K = fentry->k;
+#endif
 
 		switch (fentry->code) {
-		case BPF_ALU|BPF_ADD|BPF_X:
+		case BPF_S_ALU_ADD_X:
 			A += X;
 			continue;
-		case BPF_ALU|BPF_ADD|BPF_K:
-			A += fentry->k;
+		case BPF_S_ALU_ADD_K:
+			A += K;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_X:
+		case BPF_S_ALU_SUB_X:
 			A -= X;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_K:
-			A -= fentry->k;
+		case BPF_S_ALU_SUB_K:
+			A -= K;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_X:
+		case BPF_S_ALU_MUL_X:
 			A *= X;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_K:
-			A *= fentry->k;
+		case BPF_S_ALU_MUL_K:
+			A *= K;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_X:
+		case BPF_S_ALU_DIV_X:
 			if (X == 0)
 				return 0;
 			A /= X;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_K:
-			A /= fentry->k;
+		case BPF_S_ALU_DIV_K:
+			A = reciprocal_divide(A, K);
 			continue;
-		case BPF_ALU|BPF_AND|BPF_X:
+		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
-		case BPF_ALU|BPF_AND|BPF_K:
-			A &= fentry->k;
+		case BPF_S_ALU_AND_K:
+			A &= K;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_X:
+		case BPF_S_ALU_OR_X:
 			A |= X;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_K:
-			A |= fentry->k;
+		case BPF_S_ALU_OR_K:
+			A |= K;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_X:
+		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_K:
-			A <<= fentry->k;
+		case BPF_S_ALU_LSH_K:
+			A <<= K;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_X:
+		case BPF_S_ALU_RSH_X:
 			A >>= X;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_K:
-			A >>= fentry->k;
+		case BPF_S_ALU_RSH_K:
+			A >>= K;
 			continue;
-		case BPF_ALU|BPF_NEG:
+		case BPF_S_ALU_NEG:
 			A = -A;
 			continue;
-		case BPF_JMP|BPF_JA:
-			pc += fentry->k;
+		case BPF_S_JMP_JA:
+			fentry += K;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_K:
-			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+		case BPF_S_JMP_JGT_K:
+			fentry += (A > K) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_K:
-			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+		case BPF_S_JMP_JGE_K:
+			fentry += (A >= K) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_K:
-			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+		case BPF_S_JMP_JEQ_K:
+			fentry += (A == K) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_K:
-			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+		case BPF_S_JMP_JSET_K:
+			fentry += (A & K) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_X:
-			pc += (A > X) ? fentry->jt : fentry->jf;
+		case BPF_S_JMP_JGT_X:
+			fentry += (A > X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_X:
-			pc += (A >= X) ? fentry->jt : fentry->jf;
+		case BPF_S_JMP_JGE_X:
+			fentry += (A >= X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_X:
-			pc += (A == X) ? fentry->jt : fentry->jf;
+		case BPF_S_JMP_JEQ_X:
+			fentry += (A == X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_X:
-			pc += (A & X) ? fentry->jt : fentry->jf;
+		case BPF_S_JMP_JSET_X:
+			fentry += (A & X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_LD|BPF_W|BPF_ABS:
-			k = fentry->k;
+		case BPF_S_LD_W_ABS:
+			k = K;
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
 			if (ptr != NULL) {
 				A = get_unaligned_be32(ptr);
 				continue;
 			}
-			break;
-		case BPF_LD|BPF_H|BPF_ABS:
-			k = fentry->k;
+			return 0;
+		case BPF_S_LD_H_ABS:
+			k = K;
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
 			if (ptr != NULL) {
 				A = get_unaligned_be16(ptr);
 				continue;
 			}
-			break;
-		case BPF_LD|BPF_B|BPF_ABS:
-			k = fentry->k;
+			return 0;
+		case BPF_S_LD_B_ABS:
+			k = K;
 load_b:
 			ptr = load_pointer(skb, k, 1, &tmp);
 			if (ptr != NULL) {
 				A = *(u8 *)ptr;
 				continue;
 			}
-			break;
-		case BPF_LD|BPF_W|BPF_LEN:
+			return 0;
+		case BPF_S_LD_W_LEN:
 			A = skb->len;
 			continue;
-		case BPF_LDX|BPF_W|BPF_LEN:
+		case BPF_S_LDX_W_LEN:
 			X = skb->len;
 			continue;
-		case BPF_LD|BPF_W|BPF_IND:
-			k = X + fentry->k;
+		case BPF_S_LD_W_IND:
+			k = X + K;
 			goto load_w;
-		case BPF_LD|BPF_H|BPF_IND:
-			k = X + fentry->k;
+		case BPF_S_LD_H_IND:
+			k = X + K;
 			goto load_h;
-		case BPF_LD|BPF_B|BPF_IND:
-			k = X + fentry->k;
+		case BPF_S_LD_B_IND:
+			k = X + K;
 			goto load_b;
-		case BPF_LDX|BPF_B|BPF_MSH:
-			ptr = load_pointer(skb, fentry->k, 1, &tmp);
+		case BPF_S_LDX_B_MSH:
+			ptr = load_pointer(skb, K, 1, &tmp);
 			if (ptr != NULL) {
 				X = (*(u8 *)ptr & 0xf) << 2;
 				continue;
 			}
 			return 0;
-		case BPF_LD|BPF_IMM:
-			A = fentry->k;
+		case BPF_S_LD_IMM:
+			A = K;
 			continue;
-		case BPF_LDX|BPF_IMM:
-			X = fentry->k;
+		case BPF_S_LDX_IMM:
+			X = K;
 			continue;
-		case BPF_LD|BPF_MEM:
-			A = mem[fentry->k];
+		case BPF_S_LD_MEM:
+			A = mem[K];
 			continue;
-		case BPF_LDX|BPF_MEM:
-			X = mem[fentry->k];
+		case BPF_S_LDX_MEM:
+			X = mem[K];
 			continue;
-		case BPF_MISC|BPF_TAX:
+		case BPF_S_MISC_TAX:
 			X = A;
 			continue;
-		case BPF_MISC|BPF_TXA:
+		case BPF_S_MISC_TXA:
 			A = X;
 			continue;
-		case BPF_RET|BPF_K:
-			return fentry->k;
-		case BPF_RET|BPF_A:
+		case BPF_S_RET_K:
+			return K;
+		case BPF_S_RET_A:
 			return A;
-		case BPF_ST:
-			mem[fentry->k] = A;
+		case BPF_S_ST:
+			mem[K] = A;
 			continue;
-		case BPF_STX:
-			mem[fentry->k] = X;
+		case BPF_S_STX:
+			mem[K] = X;
 			continue;
-		default:
-			WARN_ON(1);
-			return 0;
-		}
-
-		/*
-		 * Handle ancillary data, which are impossible
-		 * (or very difficult) to get parsing packet contents.
-		 */
-		switch (k-SKF_AD_OFF) {
-		case SKF_AD_PROTOCOL:
+		case BPF_S_ANC_PROTOCOL:
 			A = ntohs(skb->protocol);
 			continue;
-		case SKF_AD_PKTTYPE:
+		case BPF_S_ANC_PKTTYPE:
 			A = skb->pkt_type;
 			continue;
-		case SKF_AD_IFINDEX:
+		case BPF_S_ANC_IFINDEX:
+			if (!skb->dev)
+				return 0;
 			A = skb->dev->ifindex;
 			continue;
-		case SKF_AD_NLATTR: {
+		case BPF_S_ANC_MARK:
+			A = skb->mark;
+			continue;
+		case BPF_S_ANC_QUEUE:
+			A = skb->queue_mapping;
+			continue;
+		case BPF_S_ANC_HATYPE:
+			if (!skb->dev)
+				return 0;
+			A = skb->dev->type;
+			continue;
+		case BPF_S_ANC_RXHASH:
+			A = skb->rxhash;
+			continue;
+		case BPF_S_ANC_CPU:
+			A = raw_smp_processor_id();
+			continue;
+		case BPF_S_ANC_NLATTR: {
 			struct nlattr *nla;
 
 			if (skb_is_nonlinear(skb))
@@ -319,7 +389,7 @@ load_b:
 				A = 0;
 			continue;
 		}
-		case SKF_AD_NLATTR_NEST: {
+		case BPF_S_ANC_NLATTR_NEST: {
 			struct nlattr *nla;
 
 			if (skb_is_nonlinear(skb))
@@ -339,6 +409,7 @@ load_b:
 			continue;
 		}
 		default:
+			WARN_ON(1);
 			return 0;
 		}
 	}
@@ -347,6 +418,66 @@ load_b:
 }
 EXPORT_SYMBOL(sk_run_filter);
 
+/*
+ * Security :
+ * A BPF program is able to use 16 cells of memory to store intermediate
+ * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
+ * As we dont want to clear mem[] array for each packet going through
+ * sk_run_filter(), we check that filter loaded by user never try to read
+ * a cell if not previously written, and we check all branches to be sure
+ * a malicious user doesnt try to abuse us.
+ */
+static int check_load_and_stores(struct sock_filter *filter, int flen)
+{
+	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+	int pc, ret = 0;
+
+	BUILD_BUG_ON(BPF_MEMWORDS > 16);
+	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
+	if (!masks)
+		return -ENOMEM;
+	memset(masks, 0xff, flen * sizeof(*masks));
+
+	for (pc = 0; pc < flen; pc++) {
+		memvalid &= masks[pc];
+
+		switch (filter[pc].code) {
+		case BPF_S_ST:
+		case BPF_S_STX:
+			memvalid |= (1 << filter[pc].k);
+			break;
+		case BPF_S_LD_MEM:
+		case BPF_S_LDX_MEM:
+			if (!(memvalid & (1 << filter[pc].k))) {
+				ret = -EINVAL;
+				goto error;
+			}
+			break;
+		case BPF_S_JMP_JA:
+			/* a jump must set masks on target */
+			masks[pc + 1 + filter[pc].k] &= memvalid;
+			memvalid = ~0;
+			break;
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+		case BPF_S_JMP_JSET_X:
+		case BPF_S_JMP_JSET_K:
+			/* a jump must set masks on targets */
+			masks[pc + 1 + filter[pc].jt] &= memvalid;
+			masks[pc + 1 + filter[pc].jf] &= memvalid;
+			memvalid = ~0;
+			break;
+		}
+	}
+error:
+	kfree(masks);
+	return ret;
+}
+
 /**
  *	sk_chk_filter - verify socket filter code
  *	@filter: filter to verify
@@ -363,7 +494,57 @@ EXPORT_SYMBOL(sk_run_filter);
  */
 int sk_chk_filter(struct sock_filter *filter, int flen)
 {
-	struct sock_filter *ftest;
+	/*
+	 * Valid instructions are initialized to non-0.
+	 * Invalid instructions are initialized to 0.
+	 */
+	static const u8 codes[] = {
+		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
+		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
+		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
+		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
+		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
+		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
+		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
+		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
+		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
+		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
+		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
+		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
+		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
+		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
+		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
+		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
+		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
+		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
+		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
+		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
+		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
+		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
+		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
+		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
+		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
+		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
+		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
+		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
+		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
+		[BPF_RET|BPF_K]          = BPF_S_RET_K,
+		[BPF_RET|BPF_A]          = BPF_S_RET_A,
+		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
+		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
+		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
+		[BPF_ST]                 = BPF_S_ST,
+		[BPF_STX]                = BPF_S_STX,
+		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
+		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
+		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
+		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
+		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
+		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
+		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
+		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
+		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
+	};
 	int pc;
 
 	if (flen == 0 || flen > BPF_MAXINSNS)
@@ -371,61 +552,31 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 
 	/* check the filter code now */
 	for (pc = 0; pc < flen; pc++) {
-		ftest = &filter[pc];
-
-		/* Only allow valid instructions */
-		switch (ftest->code) {
-		case BPF_ALU|BPF_ADD|BPF_K:
-		case BPF_ALU|BPF_ADD|BPF_X:
-		case BPF_ALU|BPF_SUB|BPF_K:
-		case BPF_ALU|BPF_SUB|BPF_X:
-		case BPF_ALU|BPF_MUL|BPF_K:
-		case BPF_ALU|BPF_MUL|BPF_X:
-		case BPF_ALU|BPF_DIV|BPF_X:
-		case BPF_ALU|BPF_AND|BPF_K:
-		case BPF_ALU|BPF_AND|BPF_X:
-		case BPF_ALU|BPF_OR|BPF_K:
-		case BPF_ALU|BPF_OR|BPF_X:
-		case BPF_ALU|BPF_LSH|BPF_K:
-		case BPF_ALU|BPF_LSH|BPF_X:
-		case BPF_ALU|BPF_RSH|BPF_K:
-		case BPF_ALU|BPF_RSH|BPF_X:
-		case BPF_ALU|BPF_NEG:
-		case BPF_LD|BPF_W|BPF_ABS:
-		case BPF_LD|BPF_H|BPF_ABS:
-		case BPF_LD|BPF_B|BPF_ABS:
-		case BPF_LD|BPF_W|BPF_LEN:
-		case BPF_LD|BPF_W|BPF_IND:
-		case BPF_LD|BPF_H|BPF_IND:
-		case BPF_LD|BPF_B|BPF_IND:
-		case BPF_LD|BPF_IMM:
-		case BPF_LDX|BPF_W|BPF_LEN:
-		case BPF_LDX|BPF_B|BPF_MSH:
-		case BPF_LDX|BPF_IMM:
-		case BPF_MISC|BPF_TAX:
-		case BPF_MISC|BPF_TXA:
-		case BPF_RET|BPF_K:
-		case BPF_RET|BPF_A:
-			break;
+		struct sock_filter *ftest = &filter[pc];
+		u16 code = ftest->code;
 
+		if (code >= ARRAY_SIZE(codes))
+			return -EINVAL;
+		code = codes[code];
+		if (!code)
+			return -EINVAL;
 		/* Some instructions need special checks */
-
-		case BPF_ALU|BPF_DIV|BPF_K:
+		switch (code) {
+		case BPF_S_ALU_DIV_K:
 			/* check for division by zero */
 			if (ftest->k == 0)
 				return -EINVAL;
+			ftest->k = reciprocal_value(ftest->k);
 			break;
-
-		case BPF_LD|BPF_MEM:
-		case BPF_LDX|BPF_MEM:
-		case BPF_ST:
-		case BPF_STX:
+		case BPF_S_LD_MEM:
+		case BPF_S_LDX_MEM:
+		case BPF_S_ST:
+		case BPF_S_STX:
 			/* check for invalid memory addresses */
 			if (ftest->k >= BPF_MEMWORDS)
 				return -EINVAL;
 			break;
-
-		case BPF_JMP|BPF_JA:
+		case BPF_S_JMP_JA:
 			/*
 			 * Note, the large ftest->k might cause loops.
 			 * Compare this with conditional jumps below,
@@ -434,48 +585,62 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 			if (ftest->k >= (unsigned)(flen-pc-1))
 				return -EINVAL;
 			break;
-
-		case BPF_JMP|BPF_JEQ|BPF_K:
-		case BPF_JMP|BPF_JEQ|BPF_X:
-		case BPF_JMP|BPF_JGE|BPF_K:
-		case BPF_JMP|BPF_JGE|BPF_X:
-		case BPF_JMP|BPF_JGT|BPF_K:
-		case BPF_JMP|BPF_JGT|BPF_X:
-		case BPF_JMP|BPF_JSET|BPF_K:
-		case BPF_JMP|BPF_JSET|BPF_X:
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+		case BPF_S_JMP_JSET_X:
+		case BPF_S_JMP_JSET_K:
 			/* for conditionals both must be safe */
 			if (pc + ftest->jt + 1 >= flen ||
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
 			break;
-
-		default:
-			return -EINVAL;
+		case BPF_S_LD_W_ABS:
+		case BPF_S_LD_H_ABS:
+		case BPF_S_LD_B_ABS:
+#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
+				code = BPF_S_ANC_##CODE;	\
+				break
+			switch (ftest->k) {
+			ANCILLARY(PROTOCOL);
+			ANCILLARY(PKTTYPE);
+			ANCILLARY(IFINDEX);
+			ANCILLARY(NLATTR);
+			ANCILLARY(NLATTR_NEST);
+			ANCILLARY(MARK);
+			ANCILLARY(QUEUE);
+			ANCILLARY(HATYPE);
+			ANCILLARY(RXHASH);
+			ANCILLARY(CPU);
+			}
 		}
+		ftest->code = code;
 	}
 
-	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
+	/* last instruction must be a RET code */
+	switch (filter[flen - 1].code) {
+	case BPF_S_RET_K:
+	case BPF_S_RET_A:
+		return check_load_and_stores(filter, flen);
+	}
+	return -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);
 
 /**
- * 	sk_filter_rcu_release: Release a socket filter by rcu_head
+ * 	sk_filter_release_rcu - Release a socket filter by rcu_head
  *	@rcu: rcu_head that contains the sk_filter to free
  */
-static void sk_filter_rcu_release(struct rcu_head *rcu)
+void sk_filter_release_rcu(struct rcu_head *rcu)
 {
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
-	sk_filter_release(fp);
-}
-
-static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
-{
-	unsigned int size = sk_filter_len(fp);
-
-	atomic_sub(size, &sk->sk_omem_alloc);
-	call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
+	kfree(fp);
 }
+EXPORT_SYMBOL(sk_filter_release_rcu);
 
 /**
  *	sk_attach_filter - attach a socket filter
@@ -514,28 +679,28 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 		return err;
 	}
 
-	rcu_read_lock_bh();
-	old_fp = rcu_dereference(sk->sk_filter);
+	old_fp = rcu_dereference_protected(sk->sk_filter,
+					   sock_owned_by_user(sk));
 	rcu_assign_pointer(sk->sk_filter, fp);
-	rcu_read_unlock_bh();
 
 	if (old_fp)
-		sk_filter_delayed_uncharge(sk, old_fp);
+		sk_filter_uncharge(sk, old_fp);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(sk_attach_filter);
 
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
 	struct sk_filter *filter;
 
-	rcu_read_lock_bh();
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_protected(sk->sk_filter,
+					   sock_owned_by_user(sk));
 	if (filter) {
 		rcu_assign_pointer(sk->sk_filter, NULL);
-		sk_filter_delayed_uncharge(sk, filter);
+		sk_filter_uncharge(sk, filter);
 		ret = 0;
 	}
-	rcu_read_unlock_bh();
 	return ret;
 }
+EXPORT_SYMBOL_GPL(sk_detach_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
index 96015871ece..127c8a7ffd6 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,120 +26,161 @@
 #include <linux/security.h>
 
 struct flow_cache_entry {
-	struct flow_cache_entry	*next;
-	u16			family;
-	u8			dir;
-	u32			genid;
-	struct flowi		key;
-	void			*object;
-	atomic_t		*object_ref;
+	union {
+		struct hlist_node	hlist;
+		struct list_head	gc_list;
+	} u;
+	u16				family;
+	u8				dir;
+	u32				genid;
+	struct flowi			key;
+	struct flow_cache_object	*object;
 };
 
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
-
-static u32 flow_hash_shift;
-#define flow_hash_size	(1 << flow_hash_shift)
-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
-
-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-static int flow_lwm, flow_hwm;
-
-struct flow_percpu_info {
-	int hash_rnd_recalc;
-	u32 hash_rnd;
-	int count;
+struct flow_cache_percpu {
+	struct hlist_head		*hash_table;
+	int				hash_count;
+	u32				hash_rnd;
+	int				hash_rnd_recalc;
+	struct tasklet_struct		flush_tasklet;
 };
-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
 
-#define flow_hash_rnd_recalc(cpu) \
-	(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
-#define flow_hash_rnd(cpu) \
-	(per_cpu(flow_hash_info, cpu).hash_rnd)
-#define flow_count(cpu) \
-	(per_cpu(flow_hash_info, cpu).count)
+struct flow_flush_info {
+	struct flow_cache		*cache;
+	atomic_t			cpuleft;
+	struct completion		completion;
+};
 
-static struct timer_list flow_hash_rnd_timer;
+struct flow_cache {
+	u32				hash_shift;
+	struct flow_cache_percpu __percpu *percpu;
+	struct notifier_block		hotcpu_notifier;
+	int				low_watermark;
+	int				high_watermark;
+	struct timer_list		rnd_timer;
+};
 
-#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+EXPORT_SYMBOL(flow_cache_genid);
+static struct flow_cache flow_cache_global;
+static struct kmem_cache *flow_cachep __read_mostly;
 
-struct flow_flush_info {
-	atomic_t cpuleft;
-	struct completion completion;
-};
-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
 
-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
+#define flow_cache_hash_size(cache)	(1 << (cache)->hash_shift)
+#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
 
 static void flow_cache_new_hashrnd(unsigned long arg)
 {
+	struct flow_cache *fc = (void *) arg;
 	int i;
 
 	for_each_possible_cpu(i)
-		flow_hash_rnd_recalc(i) = 1;
+		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
 
-	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&flow_hash_rnd_timer);
+	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&fc->rnd_timer);
 }
 
-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+	if (atomic_read(&flow_cache_genid) != fle->genid)
+		return 0;
+	if (fle->object && !fle->object->ops->check(fle->object))
+		return 0;
+	return 1;
+}
+
+static void flow_entry_kill(struct flow_cache_entry *fle)
 {
 	if (fle->object)
-		atomic_dec(fle->object_ref);
+		fle->object->ops->delete(fle->object);
 	kmem_cache_free(flow_cachep, fle);
-	flow_count(cpu)--;
 }
 
-static void __flow_cache_shrink(int cpu, int shrink_to)
+static void flow_cache_gc_task(struct work_struct *work)
 {
-	struct flow_cache_entry *fle, **flp;
-	int i;
+	struct list_head gc_list;
+	struct flow_cache_entry *fce, *n;
 
-	for (i = 0; i < flow_hash_size; i++) {
-		int k = 0;
+	INIT_LIST_HEAD(&gc_list);
+	spin_lock_bh(&flow_cache_gc_lock);
+	list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+	spin_unlock_bh(&flow_cache_gc_lock);
 
-		flp = &flow_table(cpu)[i];
-		while ((fle = *flp) != NULL && k < shrink_to) {
-			k++;
-			flp = &fle->next;
-		}
-		while ((fle = *flp) != NULL) {
-			*flp = fle->next;
-			flow_entry_kill(cpu, fle);
-		}
+	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
+		flow_entry_kill(fce);
+}
+static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
+
+static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
+				     int deleted, struct list_head *gc_list)
+{
+	if (deleted) {
+		fcp->hash_count -= deleted;
+		spin_lock_bh(&flow_cache_gc_lock);
+		list_splice_tail(gc_list, &flow_cache_gc_list);
+		spin_unlock_bh(&flow_cache_gc_lock);
+		schedule_work(&flow_cache_gc_work);
 	}
 }
 
-static void flow_cache_shrink(int cpu)
+static void __flow_cache_shrink(struct flow_cache *fc,
+				struct flow_cache_percpu *fcp,
+				int shrink_to)
 {
-	int shrink_to = flow_lwm / flow_hash_size;
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry, *tmp;
+	LIST_HEAD(gc_list);
+	int i, deleted = 0;
+
+	for (i = 0; i < flow_cache_hash_size(fc); i++) {
+		int saved = 0;
+
+		hlist_for_each_entry_safe(fle, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
+			if (saved < shrink_to &&
+			    flow_entry_valid(fle)) {
+				saved++;
+			} else {
+				deleted++;
+				hlist_del(&fle->u.hlist);
+				list_add_tail(&fle->u.gc_list, &gc_list);
+			}
+		}
+	}
 
-	__flow_cache_shrink(cpu, shrink_to);
+	flow_cache_queue_garbage(fcp, deleted, &gc_list);
 }
 
-static void flow_new_hash_rnd(int cpu)
+static void flow_cache_shrink(struct flow_cache *fc,
+			      struct flow_cache_percpu *fcp)
 {
-	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
-	flow_hash_rnd_recalc(cpu) = 0;
+	int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+
+	__flow_cache_shrink(fc, fcp, shrink_to);
+}
 
-	__flow_cache_shrink(cpu, 0);
+static void flow_new_hash_rnd(struct flow_cache *fc,
+			      struct flow_cache_percpu *fcp)
+{
+	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
+	fcp->hash_rnd_recalc = 0;
+	__flow_cache_shrink(fc, fcp, 0);
 }
 
-static u32 flow_hash_code(struct flowi *key, int cpu)
+static u32 flow_hash_code(struct flow_cache *fc,
+			  struct flow_cache_percpu *fcp,
+			  struct flowi *key)
 {
 	u32 *k = (u32 *) key;
 
-	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
-		(flow_hash_size - 1));
+	return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+		& (flow_cache_hash_size(fc) - 1);
 }
 
-#if (BITS_PER_LONG == 64)
-typedef u64 flow_compare_t;
-#else
-typedef u32 flow_compare_t;
-#endif
+typedef unsigned long flow_compare_t;
 
 /* I hear what you're saying, use memcmp.  But memcmp cannot make
  * important assumptions that we can here, such as alignment and
@@ -165,114 +206,118 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 	return 0;
 }
 
-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
-			flow_resolve_t resolver)
+struct flow_cache_object *
+flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+		  flow_resolve_t resolver, void *ctx)
 {
-	struct flow_cache_entry *fle, **head;
+	struct flow_cache *fc = &flow_cache_global;
+	struct flow_cache_percpu *fcp;
+	struct flow_cache_entry *fle, *tfle;
+	struct hlist_node *entry;
+	struct flow_cache_object *flo;
 	unsigned int hash;
-	int cpu;
 
 	local_bh_disable();
-	cpu = smp_processor_id();
+	fcp = this_cpu_ptr(fc->percpu);
 
 	fle = NULL;
+	flo = NULL;
 	/* Packet really early in init?  Making flow_cache_init a
 	 * pre-smp initcall would solve this.  --RR */
-	if (!flow_table(cpu))
+	if (!fcp->hash_table)
 		goto nocache;
 
-	if (flow_hash_rnd_recalc(cpu))
-		flow_new_hash_rnd(cpu);
-	hash = flow_hash_code(key, cpu);
-
-	head = &flow_table(cpu)[hash];
-	for (fle = *head; fle; fle = fle->next) {
-		if (fle->family == family &&
-		    fle->dir == dir &&
-		    flow_key_compare(key, &fle->key) == 0) {
-			if (fle->genid == atomic_read(&flow_cache_genid)) {
-				void *ret = fle->object;
+	if (fcp->hash_rnd_recalc)
+		flow_new_hash_rnd(fc, fcp);
 
-				if (ret)
-					atomic_inc(fle->object_ref);
-				local_bh_enable();
-
-				return ret;
-			}
+	hash = flow_hash_code(fc, fcp, key);
+	hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
+		if (tfle->family == family &&
+		    tfle->dir == dir &&
+		    flow_key_compare(key, &tfle->key) == 0) {
+			fle = tfle;
 			break;
 		}
 	}
 
-	if (!fle) {
-		if (flow_count(cpu) > flow_hwm)
-			flow_cache_shrink(cpu);
+	if (unlikely(!fle)) {
+		if (fcp->hash_count > fc->high_watermark)
+			flow_cache_shrink(fc, fcp);
 
 		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
 		if (fle) {
-			fle->next = *head;
-			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
-			flow_count(cpu)++;
+			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
+			fcp->hash_count++;
 		}
+	} else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+		flo = fle->object;
+		if (!flo)
+			goto ret_object;
+		flo = flo->ops->get(flo);
+		if (flo)
+			goto ret_object;
+	} else if (fle->object) {
+	        flo = fle->object;
+	        flo->ops->delete(flo);
+	        fle->object = NULL;
 	}
 
 nocache:
-	{
-		int err;
-		void *obj;
-		atomic_t *obj_ref;
-
-		err = resolver(net, key, family, dir, &obj, &obj_ref);
-
-		if (fle && !err) {
-			fle->genid = atomic_read(&flow_cache_genid);
-
-			if (fle->object)
-				atomic_dec(fle->object_ref);
-
-			fle->object = obj;
-			fle->object_ref = obj_ref;
-			if (obj)
-				atomic_inc(fle->object_ref);
-		}
-		local_bh_enable();
-
-		if (err)
-			obj = ERR_PTR(err);
-		return obj;
+	flo = NULL;
+	if (fle) {
+		flo = fle->object;
+		fle->object = NULL;
+	}
+	flo = resolver(net, key, family, dir, flo, ctx);
+	if (fle) {
+		fle->genid = atomic_read(&flow_cache_genid);
+		if (!IS_ERR(flo))
+			fle->object = flo;
+		else
+			fle->genid--;
+	} else {
+		if (flo && !IS_ERR(flo))
+			flo->ops->delete(flo);
 	}
+ret_object:
+	local_bh_enable();
+	return flo;
 }
+EXPORT_SYMBOL(flow_cache_lookup);
 
 static void flow_cache_flush_tasklet(unsigned long data)
 {
 	struct flow_flush_info *info = (void *)data;
-	int i;
-	int cpu;
-
-	cpu = smp_processor_id();
-	for (i = 0; i < flow_hash_size; i++) {
-		struct flow_cache_entry *fle;
-
-		fle = flow_table(cpu)[i];
-		for (; fle; fle = fle->next) {
-			unsigned genid = atomic_read(&flow_cache_genid);
-
-			if (!fle->object || fle->genid == genid)
+	struct flow_cache *fc = info->cache;
+	struct flow_cache_percpu *fcp;
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry, *tmp;
+	LIST_HEAD(gc_list);
+	int i, deleted = 0;
+
+	fcp = this_cpu_ptr(fc->percpu);
+	for (i = 0; i < flow_cache_hash_size(fc); i++) {
+		hlist_for_each_entry_safe(fle, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
+			if (flow_entry_valid(fle))
 				continue;
 
-			fle->object = NULL;
-			atomic_dec(fle->object_ref);
+			deleted++;
+			hlist_del(&fle->u.hlist);
+			list_add_tail(&fle->u.gc_list, &gc_list);
 		}
 	}
 
+	flow_cache_queue_garbage(fcp, deleted, &gc_list);
+
 	if (atomic_dec_and_test(&info->cpuleft))
 		complete(&info->completion);
 }
 
-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
 static void flow_cache_flush_per_cpu(void *data)
 {
 	struct flow_flush_info *info = data;
@@ -280,8 +325,7 @@ static void flow_cache_flush_per_cpu(void *data)
 	struct tasklet_struct *tasklet;
 
 	cpu = smp_processor_id();
-
-	tasklet = flow_flush_tasklet(cpu);
+	tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
 	tasklet->data = (unsigned long)info;
 	tasklet_schedule(tasklet);
 }
@@ -294,6 +338,7 @@ void flow_cache_flush(void)
 	/* Don't want cpus going down or up during this. */
 	get_online_cpus();
 	mutex_lock(&flow_flush_sem);
+	info.cache = &flow_cache_global;
 	atomic_set(&info.cpuleft, num_online_cpus());
 	init_completion(&info.completion);
 
@@ -307,62 +352,83 @@ void flow_cache_flush(void)
 	put_online_cpus();
 }
 
-static void __init flow_cache_cpu_prepare(int cpu)
+static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
 {
-	struct tasklet_struct *tasklet;
-	unsigned long order;
-
-	for (order = 0;
-	     (PAGE_SIZE << order) <
-		     (sizeof(struct flow_cache_entry *)*flow_hash_size);
-	     order++)
-		/* NOTHING */;
-
-	flow_table(cpu) = (struct flow_cache_entry **)
-		__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
-	if (!flow_table(cpu))
-		panic("NET: failed to allocate flow cache order %lu\n", order);
-
-	flow_hash_rnd_recalc(cpu) = 1;
-	flow_count(cpu) = 0;
-
-	tasklet = flow_flush_tasklet(cpu);
-	tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+	size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
+
+	if (!fcp->hash_table) {
+		fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
+		if (!fcp->hash_table) {
+			pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+			return -ENOMEM;
+		}
+		fcp->hash_rnd_recalc = 1;
+		fcp->hash_count = 0;
+		tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
+	}
+	return 0;
 }
 
-static int flow_cache_cpu(struct notifier_block *nfb,
+static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
 			  unsigned long action,
 			  void *hcpu)
 {
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
-		__flow_cache_shrink((unsigned long)hcpu, 0);
+	struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+	int res, cpu = (unsigned long) hcpu;
+	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		res = flow_cache_cpu_prepare(fc, cpu);
+		if (res)
+			return notifier_from_errno(res);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		__flow_cache_shrink(fc, fcp, 0);
+		break;
+	}
 	return NOTIFY_OK;
 }
 
-static int __init flow_cache_init(void)
+static int __init flow_cache_init(struct flow_cache *fc)
 {
 	int i;
 
-	flow_cachep = kmem_cache_create("flow_cache",
-					sizeof(struct flow_cache_entry),
-					0, SLAB_PANIC,
-					NULL);
-	flow_hash_shift = 10;
-	flow_lwm = 2 * flow_hash_size;
-	flow_hwm = 4 * flow_hash_size;
+	fc->hash_shift = 10;
+	fc->low_watermark = 2 * flow_cache_hash_size(fc);
+	fc->high_watermark = 4 * flow_cache_hash_size(fc);
 
-	setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
-	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&flow_hash_rnd_timer);
+	fc->percpu = alloc_percpu(struct flow_cache_percpu);
+	if (!fc->percpu)
+		return -ENOMEM;
 
-	for_each_possible_cpu(i)
-		flow_cache_cpu_prepare(i);
+	for_each_online_cpu(i) {
+		if (flow_cache_cpu_prepare(fc, i))
+			return -ENOMEM;
+	}
+	fc->hotcpu_notifier = (struct notifier_block){
+		.notifier_call = flow_cache_cpu,
+	};
+	register_hotcpu_notifier(&fc->hotcpu_notifier);
+
+	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
+		    (unsigned long) fc);
+	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&fc->rnd_timer);
 
-	hotcpu_notifier(flow_cache_cpu, 0);
 	return 0;
 }
 
-module_init(flow_cache_init);
+static int __init flow_cache_init_global(void)
+{
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_cache_entry),
+					0, SLAB_PANIC, NULL);
 
-EXPORT_SYMBOL(flow_cache_genid);
-EXPORT_SYMBOL(flow_cache_lookup);
+	return flow_cache_init(&flow_cache_global);
+}
+
+module_init(flow_cache_init_global);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 78e5bfc454a..7c2373321b7 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -32,6 +32,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/rbtree.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/gen_stats.h>
 
@@ -81,7 +82,7 @@
 struct gen_estimator
 {
 	struct list_head	list;
-	struct gnet_stats_basic	*bstats;
+	struct gnet_stats_basic_packed	*bstats;
 	struct gnet_stats_rate_est	*rate_est;
 	spinlock_t		*stats_lock;
 	int			ewma_log;
@@ -106,6 +107,7 @@ static DEFINE_RWLOCK(est_lock);
 
 /* Protects against soft lockup during large deletion */
 static struct rb_root est_root = RB_ROOT;
+static DEFINE_SPINLOCK(est_tree_lock);
 
 static void est_timer(unsigned long arg)
 {
@@ -165,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est)
 }
 
 static
-struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats,
+struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
 				    const struct gnet_stats_rate_est *rate_est)
 {
 	struct rb_node *p = est_root.rb_node;
@@ -200,9 +202,8 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats,
  *
  * Returns 0 on success or a negative error code.
  *
- * NOTE: Called under rtnl_mutex
  */
-int gen_new_estimator(struct gnet_stats_basic *bstats,
+int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_rate_est *rate_est,
 		      spinlock_t *stats_lock,
 		      struct nlattr *opt)
@@ -231,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 	est->last_packets = bstats->packets;
 	est->avpps = rate_est->pps<<10;
 
+	spin_lock_bh(&est_tree_lock);
 	if (!elist[idx].timer.function) {
 		INIT_LIST_HEAD(&elist[idx].list);
 		setup_timer(&elist[idx].timer, est_timer, idx);
@@ -241,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 
 	list_add_rcu(&est->list, &elist[idx].list);
 	gen_add_node(est);
+	spin_unlock_bh(&est_tree_lock);
 
 	return 0;
 }
@@ -260,23 +263,25 @@ static void __gen_kill_estimator(struct rcu_head *head)
  *
  * Removes the rate estimator specified by &bstats and &rate_est.
  *
- * NOTE: Called under rtnl_mutex
+ * Note : Caller should respect an RCU grace period before freeing stats_lock
  */
-void gen_kill_estimator(struct gnet_stats_basic *bstats,
+void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est *rate_est)
 {
 	struct gen_estimator *e;
 
+	spin_lock_bh(&est_tree_lock);
 	while ((e = gen_find_node(bstats, rate_est))) {
 		rb_erase(&e->node, &est_root);
 
-		write_lock_bh(&est_lock);
+		write_lock(&est_lock);
 		e->bstats = NULL;
-		write_unlock_bh(&est_lock);
+		write_unlock(&est_lock);
 
 		list_del_rcu(&e->list);
 		call_rcu(&e->e_rcu, __gen_kill_estimator);
 	}
+	spin_unlock_bh(&est_tree_lock);
 }
 EXPORT_SYMBOL(gen_kill_estimator);
 
@@ -292,7 +297,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
  *
  * Returns 0 on success or a negative error code.
  */
-int gen_replace_estimator(struct gnet_stats_basic *bstats,
+int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_rate_est *rate_est,
 			  spinlock_t *stats_lock, struct nlattr *opt)
 {
@@ -308,11 +313,17 @@ EXPORT_SYMBOL(gen_replace_estimator);
  *
  * Returns true if estimator is active, and false if not.
  */
-bool gen_estimator_active(const struct gnet_stats_basic *bstats,
+bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 			  const struct gnet_stats_rate_est *rate_est)
 {
+	bool res;
+
 	ASSERT_RTNL();
 
-	return gen_find_node(bstats, rate_est) != NULL;
+	spin_lock_bh(&est_tree_lock);
+	res = gen_find_node(bstats, rate_est) != NULL;
+	spin_unlock_bh(&est_tree_lock);
+
+	return res;
 }
 EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index c3d0ffeac24..0452eb27a27 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -73,6 +73,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_start_copy_compat);
 
 /**
  * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
@@ -93,6 +94,7 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
 {
 	return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
 }
+EXPORT_SYMBOL(gnet_stats_start_copy);
 
 /**
  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
@@ -106,22 +108,29 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
  * if the room in the socket buffer was not sufficient.
  */
 int
-gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
+gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
 {
 	if (d->compat_tc_stats) {
 		d->tc_stats.bytes = b->bytes;
 		d->tc_stats.packets = b->packets;
 	}
 
-	if (d->tail)
-		return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b));
+	if (d->tail) {
+		struct gnet_stats_basic sb;
 
+		memset(&sb, 0, sizeof(sb));
+		sb.bytes = b->bytes;
+		sb.packets = b->packets;
+		return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
+	}
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_basic);
 
 /**
  * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
  * @d: dumping handle
+ * @b: basic statistics
  * @r: rate estimator statistics
  *
  * Appends the rate estimator statistics to the top level TLV created by
@@ -131,8 +140,13 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
  * if the room in the socket buffer was not sufficient.
  */
 int
-gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
+gnet_stats_copy_rate_est(struct gnet_dump *d,
+			 const struct gnet_stats_basic_packed *b,
+			 struct gnet_stats_rate_est *r)
 {
+	if (b && !gen_estimator_active(b, r))
+		return 0;
+
 	if (d->compat_tc_stats) {
 		d->tc_stats.bps = r->bps;
 		d->tc_stats.pps = r->pps;
@@ -143,6 +157,7 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_rate_est);
 
 /**
  * gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -170,6 +185,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_queue);
 
 /**
  * gnet_stats_copy_app - copy application specific statistics into statistics TLV
@@ -197,6 +213,7 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_app);
 
 /**
  * gnet_stats_finish_copy - finish dumping procedure
@@ -230,12 +247,4 @@ gnet_stats_finish_copy(struct gnet_dump *d)
 	spin_unlock_bh(d->lock);
 	return 0;
 }
-
-
-EXPORT_SYMBOL(gnet_stats_start_copy);
-EXPORT_SYMBOL(gnet_stats_start_copy_compat);
-EXPORT_SYMBOL(gnet_stats_copy_basic);
-EXPORT_SYMBOL(gnet_stats_copy_rate_est);
-EXPORT_SYMBOL(gnet_stats_copy_queue);
-EXPORT_SYMBOL(gnet_stats_copy_app);
 EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 16ad45d4882..c40f27e7d20 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <asm/uaccess.h>
@@ -38,11 +37,13 @@
 
 int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
 {
-	int size, err, ct;
+	int size, ct, err;
 
 	if (m->msg_namelen) {
 		if (mode == VERIFY_READ) {
-			err = move_addr_to_kernel(m->msg_name, m->msg_namelen,
+			void __user *namep;
+			namep = (void __user __force *) m->msg_name;
+			err = move_addr_to_kernel(namep, m->msg_namelen,
 						  address);
 			if (err < 0)
 				return err;
@@ -53,21 +54,20 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
 	}
 
 	size = m->msg_iovlen * sizeof(struct iovec);
-	if (copy_from_user(iov, m->msg_iov, size))
+	if (copy_from_user(iov, (void __user __force *) m->msg_iov, size))
 		return -EFAULT;
 
 	m->msg_iov = iov;
 	err = 0;
 
 	for (ct = 0; ct < m->msg_iovlen; ct++) {
-		err += iov[ct].iov_len;
-		/*
-		 * Goal is not to verify user data, but to prevent returning
-		 * negative value, which is interpreted as errno.
-		 * Overflow is still possible, but it is harmless.
-		 */
-		if (err < 0)
-			return -EMSGSIZE;
+		size_t len = iov[ct].iov_len;
+
+		if (len > INT_MAX - err) {
+			len = INT_MAX - err;
+			iov[ct].iov_len = len;
+		}
+		err += len;
 	}
 
 	return err;
@@ -96,6 +96,7 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_toiovec);
 
 /*
  *	Copy kernel to iovec. Returns -EFAULT on error.
@@ -121,6 +122,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_toiovecend);
 
 /*
  *	Copy iovec to kernel. Returns -EFAULT on error.
@@ -145,6 +147,7 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_fromiovec);
 
 /*
  *	Copy iovec from kernel. Returns -EFAULT on error.
@@ -173,6 +176,7 @@ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_fromiovecend);
 
 /*
  *	And now for the all-in-one: copy and checksum from a user iovec
@@ -257,9 +261,4 @@ out_fault:
 	err = -EFAULT;
 	goto out;
 }
-
 EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
-EXPORT_SYMBOL(memcpy_fromiovec);
-EXPORT_SYMBOL(memcpy_fromiovecend);
-EXPORT_SYMBOL(memcpy_toiovec);
-EXPORT_SYMBOL(memcpy_toiovecend);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bf8f7af699d..01a1101b593 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -19,7 +19,6 @@
 #include <linux/rtnetlink.h>
 #include <linux/jiffies.h>
 #include <linux/spinlock.h>
-#include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <asm/types.h>
@@ -35,7 +34,7 @@ static unsigned long linkwatch_nextevent;
 static void linkwatch_event(struct work_struct *dummy);
 static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
 
-static struct net_device *lweventlist;
+static LIST_HEAD(lweventlist);
 static DEFINE_SPINLOCK(lweventlist_lock);
 
 static unsigned char default_operstate(const struct net_device *dev)
@@ -89,8 +88,10 @@ static void linkwatch_add_event(struct net_device *dev)
 	unsigned long flags;
 
 	spin_lock_irqsave(&lweventlist_lock, flags);
-	dev->link_watch_next = lweventlist;
-	lweventlist = dev;
+	if (list_empty(&dev->link_watch_list)) {
+		list_add_tail(&dev->link_watch_list, &lweventlist);
+		dev_hold(dev);
+	}
 	spin_unlock_irqrestore(&lweventlist_lock, flags);
 }
 
@@ -133,9 +134,35 @@ static void linkwatch_schedule_work(int urgent)
 }
 
 
+static void linkwatch_do_dev(struct net_device *dev)
+{
+	/*
+	 * Make sure the above read is complete since it can be
+	 * rewritten as soon as we clear the bit below.
+	 */
+	smp_mb__before_clear_bit();
+
+	/* We are about to handle this device,
+	 * so new events can be accepted
+	 */
+	clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+
+	rfc2863_policy(dev);
+	if (dev->flags & IFF_UP) {
+		if (netif_carrier_ok(dev))
+			dev_activate(dev);
+		else
+			dev_deactivate(dev);
+
+		netdev_state_change(dev);
+	}
+	dev_put(dev);
+}
+
 static void __linkwatch_run_queue(int urgent_only)
 {
-	struct net_device *next;
+	struct net_device *dev;
+	LIST_HEAD(wrk);
 
 	/*
 	 * Limit the number of linkwatch events to one
@@ -153,46 +180,40 @@ static void __linkwatch_run_queue(int urgent_only)
 	clear_bit(LW_URGENT, &linkwatch_flags);
 
 	spin_lock_irq(&lweventlist_lock);
-	next = lweventlist;
-	lweventlist = NULL;
-	spin_unlock_irq(&lweventlist_lock);
+	list_splice_init(&lweventlist, &wrk);
 
-	while (next) {
-		struct net_device *dev = next;
+	while (!list_empty(&wrk)) {
 
-		next = dev->link_watch_next;
+		dev = list_first_entry(&wrk, struct net_device, link_watch_list);
+		list_del_init(&dev->link_watch_list);
 
 		if (urgent_only && !linkwatch_urgent_event(dev)) {
-			linkwatch_add_event(dev);
+			list_add_tail(&dev->link_watch_list, &lweventlist);
 			continue;
 		}
-
-		/*
-		 * Make sure the above read is complete since it can be
-		 * rewritten as soon as we clear the bit below.
-		 */
-		smp_mb__before_clear_bit();
-
-		/* We are about to handle this device,
-		 * so new events can be accepted
-		 */
-		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
-
-		rfc2863_policy(dev);
-		if (dev->flags & IFF_UP) {
-			if (netif_carrier_ok(dev))
-				dev_activate(dev);
-			else
-				dev_deactivate(dev);
-
-			netdev_state_change(dev);
-		}
-
-		dev_put(dev);
+		spin_unlock_irq(&lweventlist_lock);
+		linkwatch_do_dev(dev);
+		spin_lock_irq(&lweventlist_lock);
 	}
 
-	if (lweventlist)
+	if (!list_empty(&lweventlist))
 		linkwatch_schedule_work(0);
+	spin_unlock_irq(&lweventlist_lock);
+}
+
+void linkwatch_forget_dev(struct net_device *dev)
+{
+	unsigned long flags;
+	int clean = 0;
+
+	spin_lock_irqsave(&lweventlist_lock, flags);
+	if (!list_empty(&dev->link_watch_list)) {
+		list_del_init(&dev->link_watch_list);
+		clean = 1;
+	}
+	spin_unlock_irqrestore(&lweventlist_lock, flags);
+	if (clean)
+		linkwatch_do_dev(dev);
 }
 
 
@@ -216,13 +237,10 @@ void linkwatch_fire_event(struct net_device *dev)
 	bool urgent = linkwatch_urgent_event(dev);
 
 	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
-		dev_hold(dev);
-
 		linkwatch_add_event(dev);
 	} else if (!urgent)
 		return;
 
 	linkwatch_schedule_work(urgent);
 }
-
 EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 163b4f5b036..60a90291342 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -15,6 +15,7 @@
  *	Harald Welte		Add neighbour cache statistics like rtstat
  */
 
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -40,7 +41,6 @@
 
 #define NEIGH_PRINTK(x...) printk(x)
 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
-#define NEIGH_PRINTK0 NEIGH_PRINTK
 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
 
@@ -121,7 +121,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
 
 unsigned long neigh_rand_reach_time(unsigned long base)
 {
-	return (base ? (net_random() % base) + (base >> 1) : 0);
+	return base ? (net_random() % base) + (base >> 1) : 0;
 }
 EXPORT_SYMBOL(neigh_rand_reach_time);
 
@@ -130,15 +130,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 {
 	int shrunk = 0;
 	int i;
+	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
 
 	write_lock_bh(&tbl->lock);
-	for (i = 0; i <= tbl->hash_mask; i++) {
-		struct neighbour *n, **np;
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+	for (i = 0; i <= nht->hash_mask; i++) {
+		struct neighbour *n;
+		struct neighbour __rcu **np;
 
-		np = &tbl->hash_buckets[i];
-		while ((n = *np) != NULL) {
+		np = &nht->hash_buckets[i];
+		while ((n = rcu_dereference_protected(*np,
+					lockdep_is_held(&tbl->lock))) != NULL) {
 			/* Neighbour record may be discarded if:
 			 * - nobody refers to it.
 			 * - it is not permanent
@@ -146,7 +151,9 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 			write_lock(&n->lock);
 			if (atomic_read(&n->refcnt) == 1 &&
 			    !(n->nud_state & NUD_PERMANENT)) {
-				*np	= n->next;
+				rcu_assign_pointer(*np,
+					rcu_dereference_protected(n->next,
+						  lockdep_is_held(&tbl->lock)));
 				n->dead = 1;
 				shrunk	= 1;
 				write_unlock(&n->lock);
@@ -198,16 +205,24 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 {
 	int i;
+	struct neigh_hash_table *nht;
 
-	for (i = 0; i <= tbl->hash_mask; i++) {
-		struct neighbour *n, **np = &tbl->hash_buckets[i];
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
 
-		while ((n = *np) != NULL) {
+	for (i = 0; i <= nht->hash_mask; i++) {
+		struct neighbour *n;
+		struct neighbour __rcu **np = &nht->hash_buckets[i];
+
+		while ((n = rcu_dereference_protected(*np,
+					lockdep_is_held(&tbl->lock))) != NULL) {
 			if (dev && n->dev != dev) {
 				np = &n->next;
 				continue;
 			}
-			*np = n->next;
+			rcu_assign_pointer(*np,
+				   rcu_dereference_protected(n->next,
+						lockdep_is_held(&tbl->lock)));
 			write_lock(&n->lock);
 			neigh_del_timer(n);
 			n->dead = 1;
@@ -278,6 +293,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 
 	skb_queue_head_init(&n->arp_queue);
 	rwlock_init(&n->lock);
+	seqlock_init(&n->ha_lock);
 	n->updated	  = n->used = now;
 	n->nud_state	  = NUD_NONE;
 	n->output	  = neigh_blackhole;
@@ -296,64 +312,86 @@ out_entries:
 	goto out;
 }
 
-static struct neighbour **neigh_hash_alloc(unsigned int entries)
+static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
 {
-	unsigned long size = entries * sizeof(struct neighbour *);
-	struct neighbour **ret;
+	size_t size = entries * sizeof(struct neighbour *);
+	struct neigh_hash_table *ret;
+	struct neighbour **buckets;
 
-	if (size <= PAGE_SIZE) {
-		ret = kzalloc(size, GFP_ATOMIC);
-	} else {
-		ret = (struct neighbour **)
-		      __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
+	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
+	if (!ret)
+		return NULL;
+	if (size <= PAGE_SIZE)
+		buckets = kzalloc(size, GFP_ATOMIC);
+	else
+		buckets = (struct neighbour **)
+			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+					   get_order(size));
+	if (!buckets) {
+		kfree(ret);
+		return NULL;
 	}
+	rcu_assign_pointer(ret->hash_buckets, buckets);
+	ret->hash_mask = entries - 1;
+	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
 	return ret;
 }
 
-static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+static void neigh_hash_free_rcu(struct rcu_head *head)
 {
-	unsigned long size = entries * sizeof(struct neighbour *);
+	struct neigh_hash_table *nht = container_of(head,
+						    struct neigh_hash_table,
+						    rcu);
+	size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
+	struct neighbour **buckets = nht->hash_buckets;
 
 	if (size <= PAGE_SIZE)
-		kfree(hash);
+		kfree(buckets);
 	else
-		free_pages((unsigned long)hash, get_order(size));
+		free_pages((unsigned long)buckets, get_order(size));
+	kfree(nht);
 }
 
-static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
+static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
+						unsigned long new_entries)
 {
-	struct neighbour **new_hash, **old_hash;
-	unsigned int i, new_hash_mask, old_entries;
+	unsigned int i, hash;
+	struct neigh_hash_table *new_nht, *old_nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
 
 	BUG_ON(!is_power_of_2(new_entries));
-	new_hash = neigh_hash_alloc(new_entries);
-	if (!new_hash)
-		return;
+	old_nht = rcu_dereference_protected(tbl->nht,
+					    lockdep_is_held(&tbl->lock));
+	new_nht = neigh_hash_alloc(new_entries);
+	if (!new_nht)
+		return old_nht;
 
-	old_entries = tbl->hash_mask + 1;
-	new_hash_mask = new_entries - 1;
-	old_hash = tbl->hash_buckets;
-
-	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
-	for (i = 0; i < old_entries; i++) {
+	for (i = 0; i <= old_nht->hash_mask; i++) {
 		struct neighbour *n, *next;
 
-		for (n = old_hash[i]; n; n = next) {
-			unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
-
-			hash_val &= new_hash_mask;
-			next = n->next;
-
-			n->next = new_hash[hash_val];
-			new_hash[hash_val] = n;
+		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
+						   lockdep_is_held(&tbl->lock));
+		     n != NULL;
+		     n = next) {
+			hash = tbl->hash(n->primary_key, n->dev,
+					 new_nht->hash_rnd);
+
+			hash &= new_nht->hash_mask;
+			next = rcu_dereference_protected(n->next,
+						lockdep_is_held(&tbl->lock));
+
+			rcu_assign_pointer(n->next,
+					   rcu_dereference_protected(
+						new_nht->hash_buckets[hash],
+						lockdep_is_held(&tbl->lock)));
+			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
 		}
 	}
-	tbl->hash_buckets = new_hash;
-	tbl->hash_mask = new_hash_mask;
 
-	neigh_hash_free(old_hash, old_entries);
+	rcu_assign_pointer(tbl->nht, new_nht);
+	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
+	return new_nht;
 }
 
 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
@@ -362,19 +400,26 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 	struct neighbour *n;
 	int key_len = tbl->key_len;
 	u32 hash_val;
+	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, lookups);
 
-	read_lock_bh(&tbl->lock);
-	hash_val = tbl->hash(pkey, dev);
-	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
+
+	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+	     n != NULL;
+	     n = rcu_dereference_bh(n->next)) {
 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
-			neigh_hold(n);
+			if (!atomic_inc_not_zero(&n->refcnt))
+				n = NULL;
 			NEIGH_CACHE_STAT_INC(tbl, hits);
 			break;
 		}
 	}
-	read_unlock_bh(&tbl->lock);
+
+	rcu_read_unlock_bh();
 	return n;
 }
 EXPORT_SYMBOL(neigh_lookup);
@@ -385,20 +430,27 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 	struct neighbour *n;
 	int key_len = tbl->key_len;
 	u32 hash_val;
+	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, lookups);
 
-	read_lock_bh(&tbl->lock);
-	hash_val = tbl->hash(pkey, NULL);
-	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
+
+	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+	     n != NULL;
+	     n = rcu_dereference_bh(n->next)) {
 		if (!memcmp(n->primary_key, pkey, key_len) &&
 		    net_eq(dev_net(n->dev), net)) {
-			neigh_hold(n);
+			if (!atomic_inc_not_zero(&n->refcnt))
+				n = NULL;
 			NEIGH_CACHE_STAT_INC(tbl, hits);
 			break;
 		}
 	}
-	read_unlock_bh(&tbl->lock);
+
+	rcu_read_unlock_bh();
 	return n;
 }
 EXPORT_SYMBOL(neigh_lookup_nodev);
@@ -410,6 +462,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 	int key_len = tbl->key_len;
 	int error;
 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+	struct neigh_hash_table *nht;
 
 	if (!n) {
 		rc = ERR_PTR(-ENOBUFS);
@@ -436,18 +489,24 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 
 	write_lock_bh(&tbl->lock);
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
 
-	if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
-		neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
+	if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
+		nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
 
-	hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
 
 	if (n->parms->dead) {
 		rc = ERR_PTR(-EINVAL);
 		goto out_tbl_unlock;
 	}
 
-	for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
+	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
+					    lockdep_is_held(&tbl->lock));
+	     n1 != NULL;
+	     n1 = rcu_dereference_protected(n1->next,
+			lockdep_is_held(&tbl->lock))) {
 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
 			neigh_hold(n1);
 			rc = n1;
@@ -455,10 +514,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		}
 	}
 
-	n->next = tbl->hash_buckets[hash_val];
-	tbl->hash_buckets[hash_val] = n;
 	n->dead = 0;
 	neigh_hold(n);
+	rcu_assign_pointer(n->next,
+			   rcu_dereference_protected(nht->hash_buckets[hash_val],
+						     lockdep_is_held(&tbl->lock)));
+	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
 	write_unlock_bh(&tbl->lock);
 	NEIGH_PRINTK2("neigh %p is created.\n", n);
 	rc = n;
@@ -615,6 +676,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
 		neigh_parms_destroy(parms);
 }
 
+static void neigh_destroy_rcu(struct rcu_head *head)
+{
+	struct neighbour *neigh = container_of(head, struct neighbour, rcu);
+
+	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+}
 /*
  *	neighbour must already be out of the table;
  *
@@ -642,8 +709,7 @@ void neigh_destroy(struct neighbour *neigh)
 		write_seqlock_bh(&hh->hh_lock);
 		hh->hh_output = neigh_blackhole;
 		write_sequnlock_bh(&hh->hh_lock);
-		if (atomic_dec_and_test(&hh->hh_refcnt))
-			kfree(hh);
+		hh_cache_put(hh);
 	}
 
 	skb_queue_purge(&neigh->arp_queue);
@@ -654,7 +720,7 @@ void neigh_destroy(struct neighbour *neigh)
 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
 
 	atomic_dec(&neigh->tbl->entries);
-	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+	call_rcu(&neigh->rcu, neigh_destroy_rcu);
 }
 EXPORT_SYMBOL(neigh_destroy);
 
@@ -692,86 +758,92 @@ static void neigh_connect(struct neighbour *neigh)
 		hh->hh_output = neigh->ops->hh_output;
 }
 
-static void neigh_periodic_timer(unsigned long arg)
+static void neigh_periodic_work(struct work_struct *work)
 {
-	struct neigh_table *tbl = (struct neigh_table *)arg;
-	struct neighbour *n, **np;
-	unsigned long expire, now = jiffies;
+	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
+	struct neighbour *n;
+	struct neighbour __rcu **np;
+	unsigned int i;
+	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
-	write_lock(&tbl->lock);
+	write_lock_bh(&tbl->lock);
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
 
 	/*
 	 *	periodically recompute ReachableTime from random function
 	 */
 
-	if (time_after(now, tbl->last_rand + 300 * HZ)) {
+	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
 		struct neigh_parms *p;
-		tbl->last_rand = now;
+		tbl->last_rand = jiffies;
 		for (p = &tbl->parms; p; p = p->next)
 			p->reachable_time =
 				neigh_rand_reach_time(p->base_reachable_time);
 	}
 
-	np = &tbl->hash_buckets[tbl->hash_chain_gc];
-	tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
+	for (i = 0 ; i <= nht->hash_mask; i++) {
+		np = &nht->hash_buckets[i];
 
-	while ((n = *np) != NULL) {
-		unsigned int state;
+		while ((n = rcu_dereference_protected(*np,
+				lockdep_is_held(&tbl->lock))) != NULL) {
+			unsigned int state;
 
-		write_lock(&n->lock);
+			write_lock(&n->lock);
 
-		state = n->nud_state;
-		if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
-			write_unlock(&n->lock);
-			goto next_elt;
-		}
+			state = n->nud_state;
+			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+				write_unlock(&n->lock);
+				goto next_elt;
+			}
 
-		if (time_before(n->used, n->confirmed))
-			n->used = n->confirmed;
+			if (time_before(n->used, n->confirmed))
+				n->used = n->confirmed;
 
-		if (atomic_read(&n->refcnt) == 1 &&
-		    (state == NUD_FAILED ||
-		     time_after(now, n->used + n->parms->gc_staletime))) {
-			*np = n->next;
-			n->dead = 1;
+			if (atomic_read(&n->refcnt) == 1 &&
+			    (state == NUD_FAILED ||
+			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
+				*np = n->next;
+				n->dead = 1;
+				write_unlock(&n->lock);
+				neigh_cleanup_and_release(n);
+				continue;
+			}
 			write_unlock(&n->lock);
-			neigh_cleanup_and_release(n);
-			continue;
-		}
-		write_unlock(&n->lock);
 
 next_elt:
-		np = &n->next;
+			np = &n->next;
+		}
+		/*
+		 * It's fine to release lock here, even if hash table
+		 * grows while we are preempted.
+		 */
+		write_unlock_bh(&tbl->lock);
+		cond_resched();
+		write_lock_bh(&tbl->lock);
 	}
-
 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
 	 * base_reachable_time.
 	 */
-	expire = tbl->parms.base_reachable_time >> 1;
-	expire /= (tbl->hash_mask + 1);
-	if (!expire)
-		expire = 1;
-
-	if (expire>HZ)
-		mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
-	else
-		mod_timer(&tbl->gc_timer, now + expire);
-
-	write_unlock(&tbl->lock);
+	schedule_delayed_work(&tbl->gc_work,
+			      tbl->parms.base_reachable_time >> 1);
+	write_unlock_bh(&tbl->lock);
 }
 
 static __inline__ int neigh_max_probes(struct neighbour *n)
 {
 	struct neigh_parms *p = n->parms;
-	return (n->nud_state & NUD_PROBE ?
+	return (n->nud_state & NUD_PROBE) ?
 		p->ucast_probes :
-		p->ucast_probes + p->app_probes + p->mcast_probes);
+		p->ucast_probes + p->app_probes + p->mcast_probes;
 }
 
 static void neigh_invalidate(struct neighbour *neigh)
+	__releases(neigh->lock)
+	__acquires(neigh->lock)
 {
 	struct sk_buff *skb;
 
@@ -932,6 +1004,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 				kfree_skb(buff);
 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
 			}
+			skb_dst_force(skb);
 			__skb_queue_tail(&neigh->arp_queue, skb);
 		}
 		rc = 1;
@@ -942,11 +1015,14 @@ out_unlock_bh:
 }
 EXPORT_SYMBOL(__neigh_event_send);
 
-static void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(const struct neighbour *neigh)
 {
 	struct hh_cache *hh;
 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
-		= neigh->dev->header_ops->cache_update;
+		= NULL;
+
+	if (neigh->dev->header_ops)
+		update = neigh->dev->header_ops->cache_update;
 
 	if (update) {
 		for (hh = neigh->hh; hh; hh = hh->hh_next) {
@@ -1075,7 +1151,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 	}
 
 	if (lladdr != neigh->ha) {
+		write_seqlock(&neigh->ha_lock);
 		memcpy(&neigh->ha, lladdr, dev->addr_len);
+		write_sequnlock(&neigh->ha_lock);
 		neigh_update_hhs(neigh);
 		if (!(new & NUD_CONNECTED))
 			neigh->confirmed = jiffies -
@@ -1133,44 +1211,73 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
 }
 EXPORT_SYMBOL(neigh_event_ns);
 
+static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
+				   __be16 protocol)
+{
+	struct hh_cache *hh;
+
+	smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
+	for (hh = n->hh; hh; hh = hh->hh_next) {
+		if (hh->hh_type == protocol) {
+			atomic_inc(&hh->hh_refcnt);
+			if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+				hh_cache_put(hh);
+			return true;
+		}
+	}
+	return false;
+}
+
+/* called with read_lock_bh(&n->lock); */
 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
 			  __be16 protocol)
 {
 	struct hh_cache	*hh;
 	struct net_device *dev = dst->dev;
 
-	for (hh = n->hh; hh; hh = hh->hh_next)
-		if (hh->hh_type == protocol)
-			break;
+	if (likely(neigh_hh_lookup(n, dst, protocol)))
+		return;
 
-	if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
-		seqlock_init(&hh->hh_lock);
-		hh->hh_type = protocol;
-		atomic_set(&hh->hh_refcnt, 0);
-		hh->hh_next = NULL;
+	/* slow path */
+	hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
+	if (!hh)
+		return;
 
-		if (dev->header_ops->cache(n, hh)) {
-			kfree(hh);
-			hh = NULL;
-		} else {
-			atomic_inc(&hh->hh_refcnt);
-			hh->hh_next = n->hh;
-			n->hh	    = hh;
-			if (n->nud_state & NUD_CONNECTED)
-				hh->hh_output = n->ops->hh_output;
-			else
-				hh->hh_output = n->ops->output;
-		}
+	seqlock_init(&hh->hh_lock);
+	hh->hh_type = protocol;
+	atomic_set(&hh->hh_refcnt, 2);
+
+	if (dev->header_ops->cache(n, hh)) {
+		kfree(hh);
+		return;
 	}
-	if (hh)	{
-		atomic_inc(&hh->hh_refcnt);
-		dst->hh = hh;
+
+	write_lock_bh(&n->lock);
+
+	/* must check if another thread already did the insert */
+	if (neigh_hh_lookup(n, dst, protocol)) {
+		kfree(hh);
+		goto end;
 	}
+
+	if (n->nud_state & NUD_CONNECTED)
+		hh->hh_output = n->ops->hh_output;
+	else
+		hh->hh_output = n->ops->output;
+
+	hh->hh_next = n->hh;
+	smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
+	n->hh	    = hh;
+
+	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+		hh_cache_put(hh);
+end:
+	write_unlock_bh(&n->lock);
 }
 
 /* This function can be used in contexts, where only old dev_queue_xmit
-   worked, f.e. if you want to override normal output path (eql, shaper),
-   but resolution is not made yet.
+ * worked, f.e. if you want to override normal output path (eql, shaper),
+ * but resolution is not made yet.
  */
 
 int neigh_compat_output(struct sk_buff *skb)
@@ -1204,19 +1311,19 @@ int neigh_resolve_output(struct sk_buff *skb)
 	if (!neigh_event_send(neigh, skb)) {
 		int err;
 		struct net_device *dev = neigh->dev;
-		if (dev->header_ops->cache && !dst->hh) {
-			write_lock_bh(&neigh->lock);
-			if (!dst->hh)
-				neigh_hh_init(neigh, dst, dst->ops->protocol);
-			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-					      neigh->ha, NULL, skb->len);
-			write_unlock_bh(&neigh->lock);
-		} else {
-			read_lock_bh(&neigh->lock);
+		unsigned int seq;
+
+		if (dev->header_ops->cache &&
+		    !dst->hh &&
+		    !(dst->flags & DST_NOCACHE))
+			neigh_hh_init(neigh, dst, dst->ops->protocol);
+
+		do {
+			seq = read_seqbegin(&neigh->ha_lock);
 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
 					      neigh->ha, NULL, skb->len);
-			read_unlock_bh(&neigh->lock);
-		}
+		} while (read_seqretry(&neigh->ha_lock, seq));
+
 		if (err >= 0)
 			rc = neigh->ops->queue_xmit(skb);
 		else
@@ -1242,13 +1349,16 @@ int neigh_connected_output(struct sk_buff *skb)
 	struct dst_entry *dst = skb_dst(skb);
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
+	unsigned int seq;
 
 	__skb_pull(skb, skb_network_offset(skb));
 
-	read_lock_bh(&neigh->lock);
-	err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-			      neigh->ha, NULL, skb->len);
-	read_unlock_bh(&neigh->lock);
+	do {
+		seq = read_seqbegin(&neigh->ha_lock);
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+				      neigh->ha, NULL, skb->len);
+	} while (read_seqretry(&neigh->ha_lock, seq));
+
 	if (err >= 0)
 		err = neigh->ops->queue_xmit(skb);
 	else {
@@ -1316,7 +1426,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 }
 EXPORT_SYMBOL(pneigh_enqueue);
 
-static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
+static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
 						      struct net *net, int ifindex)
 {
 	struct neigh_parms *p;
@@ -1337,7 +1447,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 	struct net *net = dev_net(dev);
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-	ref = lookup_neigh_params(tbl, net, 0);
+	ref = lookup_neigh_parms(tbl, net, 0);
 	if (!ref)
 		return NULL;
 
@@ -1430,22 +1540,17 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 		panic("cannot create neighbour proc dir entry");
 #endif
 
-	tbl->hash_mask = 1;
-	tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
+	tbl->nht = neigh_hash_alloc(8);
 
 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
 
-	if (!tbl->hash_buckets || !tbl->phash_buckets)
+	if (!tbl->nht || !tbl->phash_buckets)
 		panic("cannot allocate neighbour cache hashes");
 
-	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
-
 	rwlock_init(&tbl->lock);
-	setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl);
-	tbl->gc_timer.expires  = now + 1;
-	add_timer(&tbl->gc_timer);
-
+	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
+	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
 	skb_queue_head_init_class(&tbl->proxy_queue,
 			&neigh_table_proxy_queue_class);
@@ -1482,7 +1587,7 @@ int neigh_table_clear(struct neigh_table *tbl)
 	struct neigh_table **tp;
 
 	/* It is not clean... Fix it to unload IPv6 module safely */
-	del_timer_sync(&tbl->gc_timer);
+	cancel_delayed_work_sync(&tbl->gc_work);
 	del_timer_sync(&tbl->proxy_timer);
 	pneigh_queue_purge(&tbl->proxy_queue);
 	neigh_ifdown(tbl, NULL);
@@ -1497,8 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl)
 	}
 	write_unlock(&neigh_tbl_lock);
 
-	neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
-	tbl->hash_buckets = NULL;
+	call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+	tbl->nht = NULL;
 
 	kfree(tbl->phash_buckets);
 	tbl->phash_buckets = NULL;
@@ -1524,6 +1629,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct net_device *dev = NULL;
 	int err = -EINVAL;
 
+	ASSERT_RTNL();
 	if (nlmsg_len(nlh) < sizeof(*ndm))
 		goto out;
 
@@ -1533,7 +1639,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex) {
-		dev = dev_get_by_index(net, ndm->ndm_ifindex);
+		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
 		if (dev == NULL) {
 			err = -ENODEV;
 			goto out;
@@ -1549,34 +1655,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		read_unlock(&neigh_tbl_lock);
 
 		if (nla_len(dst_attr) < tbl->key_len)
-			goto out_dev_put;
+			goto out;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
-			goto out_dev_put;
+			goto out;
 		}
 
 		if (dev == NULL)
-			goto out_dev_put;
+			goto out;
 
 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
 		if (neigh == NULL) {
 			err = -ENOENT;
-			goto out_dev_put;
+			goto out;
 		}
 
 		err = neigh_update(neigh, NULL, NUD_FAILED,
 				   NEIGH_UPDATE_F_OVERRIDE |
 				   NEIGH_UPDATE_F_ADMIN);
 		neigh_release(neigh);
-		goto out_dev_put;
+		goto out;
 	}
 	read_unlock(&neigh_tbl_lock);
 	err = -EAFNOSUPPORT;
 
-out_dev_put:
-	if (dev)
-		dev_put(dev);
 out:
 	return err;
 }
@@ -1590,6 +1693,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct net_device *dev = NULL;
 	int err;
 
+	ASSERT_RTNL();
 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
 	if (err < 0)
 		goto out;
@@ -1600,14 +1704,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex) {
-		dev = dev_get_by_index(net, ndm->ndm_ifindex);
+		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
 		if (dev == NULL) {
 			err = -ENODEV;
 			goto out;
 		}
 
 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
-			goto out_dev_put;
+			goto out;
 	}
 
 	read_lock(&neigh_tbl_lock);
@@ -1621,7 +1725,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		read_unlock(&neigh_tbl_lock);
 
 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
-			goto out_dev_put;
+			goto out;
 		dst = nla_data(tb[NDA_DST]);
 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
 
@@ -1634,29 +1738,29 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 				pn->flags = ndm->ndm_flags;
 				err = 0;
 			}
-			goto out_dev_put;
+			goto out;
 		}
 
 		if (dev == NULL)
-			goto out_dev_put;
+			goto out;
 
 		neigh = neigh_lookup(tbl, dst, dev);
 		if (neigh == NULL) {
 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
 				err = -ENOENT;
-				goto out_dev_put;
+				goto out;
 			}
 
 			neigh = __neigh_lookup_errno(tbl, dst, dev);
 			if (IS_ERR(neigh)) {
 				err = PTR_ERR(neigh);
-				goto out_dev_put;
+				goto out;
 			}
 		} else {
 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
 				err = -EEXIST;
 				neigh_release(neigh);
-				goto out_dev_put;
+				goto out;
 			}
 
 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
@@ -1669,15 +1773,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		} else
 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
 		neigh_release(neigh);
-		goto out_dev_put;
+		goto out;
 	}
 
 	read_unlock(&neigh_tbl_lock);
 	err = -EAFNOSUPPORT;
-
-out_dev_put:
-	if (dev)
-		dev_put(dev);
 out:
 	return err;
 }
@@ -1743,19 +1843,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
 		unsigned long now = jiffies;
 		unsigned int flush_delta = now - tbl->last_flush;
 		unsigned int rand_delta = now - tbl->last_rand;
-
+		struct neigh_hash_table *nht;
 		struct ndt_config ndc = {
 			.ndtc_key_len		= tbl->key_len,
 			.ndtc_entry_size	= tbl->entry_size,
 			.ndtc_entries		= atomic_read(&tbl->entries),
 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
-			.ndtc_hash_rnd		= tbl->hash_rnd,
-			.ndtc_hash_mask		= tbl->hash_mask,
-			.ndtc_hash_chain_gc	= tbl->hash_chain_gc,
 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
 		};
 
+		rcu_read_lock_bh();
+		nht = rcu_dereference_bh(tbl->nht);
+		ndc.ndtc_hash_rnd = nht->hash_rnd;
+		ndc.ndtc_hash_mask = nht->hash_mask;
+		rcu_read_unlock_bh();
+
 		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
 	}
 
@@ -1906,7 +2009,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		if (tbp[NDTPA_IFINDEX])
 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
 
-		p = lookup_neigh_params(tbl, net, ifindex);
+		p = lookup_neigh_parms(tbl, net, ifindex);
 		if (p == NULL) {
 			err = -ENOENT;
 			goto errout_tbl_lock;
@@ -2052,10 +2155,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 
 	read_lock_bh(&neigh->lock);
 	ndm->ndm_state	 = neigh->nud_state;
-	if ((neigh->nud_state & NUD_VALID) &&
-	    nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
-		read_unlock_bh(&neigh->lock);
-		goto nla_put_failure;
+	if (neigh->nud_state & NUD_VALID) {
+		char haddr[MAX_ADDR_LEN];
+
+		neigh_ha_snapshot(haddr, neigh, neigh->dev);
+		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
+			read_unlock_bh(&neigh->lock);
+			goto nla_put_failure;
+		}
 	}
 
 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
@@ -2083,19 +2190,24 @@ static void neigh_update_notify(struct neighbour *neigh)
 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 			    struct netlink_callback *cb)
 {
-	struct net * net = sock_net(skb->sk);
+	struct net *net = sock_net(skb->sk);
 	struct neighbour *n;
 	int rc, h, s_h = cb->args[1];
 	int idx, s_idx = idx = cb->args[2];
+	struct neigh_hash_table *nht;
 
-	read_lock_bh(&tbl->lock);
-	for (h = 0; h <= tbl->hash_mask; h++) {
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+
+	for (h = 0; h <= nht->hash_mask; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
 			s_idx = 0;
-		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
-			if (dev_net(n->dev) != net)
+		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
+		     n != NULL;
+		     n = rcu_dereference_bh(n->next)) {
+			if (!net_eq(dev_net(n->dev), net))
 				continue;
 			if (idx < s_idx)
 				goto next;
@@ -2103,17 +2215,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 					    cb->nlh->nlmsg_seq,
 					    RTM_NEWNEIGH,
 					    NLM_F_MULTI) <= 0) {
-				read_unlock_bh(&tbl->lock);
 				rc = -1;
 				goto out;
 			}
-		next:
+next:
 			idx++;
 		}
 	}
-	read_unlock_bh(&tbl->lock);
 	rc = skb->len;
 out:
+	rcu_read_unlock_bh();
 	cb->args[1] = h;
 	cb->args[2] = idx;
 	return rc;
@@ -2146,15 +2257,22 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
 {
 	int chain;
+	struct neigh_hash_table *nht;
 
-	read_lock_bh(&tbl->lock);
-	for (chain = 0; chain <= tbl->hash_mask; chain++) {
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+
+	read_lock(&tbl->lock); /* avoid resizes */
+	for (chain = 0; chain <= nht->hash_mask; chain++) {
 		struct neighbour *n;
 
-		for (n = tbl->hash_buckets[chain]; n; n = n->next)
+		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
+		     n != NULL;
+		     n = rcu_dereference_bh(n->next))
 			cb(n, cookie);
 	}
-	read_unlock_bh(&tbl->lock);
+	read_unlock(&tbl->lock);
+	rcu_read_unlock_bh();
 }
 EXPORT_SYMBOL(neigh_for_each);
 
@@ -2163,18 +2281,25 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 			      int (*cb)(struct neighbour *))
 {
 	int chain;
+	struct neigh_hash_table *nht;
 
-	for (chain = 0; chain <= tbl->hash_mask; chain++) {
-		struct neighbour *n, **np;
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+	for (chain = 0; chain <= nht->hash_mask; chain++) {
+		struct neighbour *n;
+		struct neighbour __rcu **np;
 
-		np = &tbl->hash_buckets[chain];
-		while ((n = *np) != NULL) {
+		np = &nht->hash_buckets[chain];
+		while ((n = rcu_dereference_protected(*np,
+					lockdep_is_held(&tbl->lock))) != NULL) {
 			int release;
 
 			write_lock(&n->lock);
 			release = cb(n);
 			if (release) {
-				*np = n->next;
+				rcu_assign_pointer(*np,
+					rcu_dereference_protected(n->next,
+						lockdep_is_held(&tbl->lock)));
 				n->dead = 1;
 			} else
 				np = &n->next;
@@ -2192,13 +2317,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
 {
 	struct neigh_seq_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct neigh_table *tbl = state->tbl;
+	struct neigh_hash_table *nht = state->nht;
 	struct neighbour *n = NULL;
 	int bucket = state->bucket;
 
 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
-	for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
-		n = tbl->hash_buckets[bucket];
+	for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
+		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
 
 		while (n) {
 			if (!net_eq(dev_net(n->dev), net))
@@ -2215,8 +2340,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
 				break;
 			if (n->nud_state & ~NUD_NOARP)
 				break;
-		next:
-			n = n->next;
+next:
+			n = rcu_dereference_bh(n->next);
 		}
 
 		if (n)
@@ -2233,14 +2358,14 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 {
 	struct neigh_seq_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct neigh_table *tbl = state->tbl;
+	struct neigh_hash_table *nht = state->nht;
 
 	if (state->neigh_sub_iter) {
 		void *v = state->neigh_sub_iter(state, n, pos);
 		if (v)
 			return n;
 	}
-	n = n->next;
+	n = rcu_dereference_bh(n->next);
 
 	while (1) {
 		while (n) {
@@ -2257,17 +2382,17 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 
 			if (n->nud_state & ~NUD_NOARP)
 				break;
-		next:
-			n = n->next;
+next:
+			n = rcu_dereference_bh(n->next);
 		}
 
 		if (n)
 			break;
 
-		if (++state->bucket > tbl->hash_mask)
+		if (++state->bucket > nht->hash_mask)
 			break;
 
-		n = tbl->hash_buckets[state->bucket];
+		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
 	}
 
 	if (n && pos)
@@ -2365,7 +2490,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
 }
 
 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
-	__acquires(tbl->lock)
+	__acquires(rcu_bh)
 {
 	struct neigh_seq_state *state = seq->private;
 
@@ -2373,7 +2498,8 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
 	state->bucket = 0;
 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
 
-	read_lock_bh(&tbl->lock);
+	rcu_read_lock_bh();
+	state->nht = rcu_dereference_bh(tbl->nht);
 
 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
 }
@@ -2407,12 +2533,9 @@ out:
 EXPORT_SYMBOL(neigh_seq_next);
 
 void neigh_seq_stop(struct seq_file *seq, void *v)
-	__releases(tbl->lock)
+	__releases(rcu_bh)
 {
-	struct neigh_seq_state *state = seq->private;
-	struct neigh_table *tbl = state->tbl;
-
-	read_unlock_bh(&tbl->lock);
+	rcu_read_unlock_bh();
 }
 EXPORT_SYMBOL(neigh_seq_stop);
 
@@ -2420,8 +2543,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
 
 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct proc_dir_entry *pde = seq->private;
-	struct neigh_table *tbl = pde->data;
+	struct neigh_table *tbl = seq->private;
 	int cpu;
 
 	if (*pos == 0)
@@ -2438,8 +2560,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct proc_dir_entry *pde = seq->private;
-	struct neigh_table *tbl = pde->data;
+	struct neigh_table *tbl = seq->private;
 	int cpu;
 
 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -2458,8 +2579,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
 
 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
 {
-	struct proc_dir_entry *pde = seq->private;
-	struct neigh_table *tbl = pde->data;
+	struct neigh_table *tbl = seq->private;
 	struct neigh_statistics *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -2504,7 +2624,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *sf = file->private_data;
-		sf->private = PDE(inode);
+		sf->private = PDE(inode)->data;
 	}
 	return ret;
 };
@@ -2562,28 +2682,27 @@ EXPORT_SYMBOL(neigh_app_ns);
 
 #ifdef CONFIG_SYSCTL
 
+#define NEIGH_VARS_MAX 19
+
 static struct neigh_sysctl_table {
 	struct ctl_table_header *sysctl_header;
-	struct ctl_table neigh_vars[__NET_NEIGH_MAX];
+	struct ctl_table neigh_vars[NEIGH_VARS_MAX];
 	char *dev_name;
 } neigh_sysctl_template __read_mostly = {
 	.neigh_vars = {
 		{
-			.ctl_name	= NET_NEIGH_MCAST_SOLICIT,
 			.procname	= "mcast_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	= NET_NEIGH_UCAST_SOLICIT,
 			.procname	= "ucast_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	= NET_NEIGH_APP_SOLICIT,
 			.procname	= "app_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
@@ -2596,38 +2715,30 @@ static struct neigh_sysctl_table {
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
 		{
-			.ctl_name	= NET_NEIGH_REACHABLE_TIME,
 			.procname	= "base_reachable_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
-			.strategy	= sysctl_jiffies,
 		},
 		{
-			.ctl_name	= NET_NEIGH_DELAY_PROBE_TIME,
 			.procname	= "delay_first_probe_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
-			.strategy	= sysctl_jiffies,
 		},
 		{
-			.ctl_name	= NET_NEIGH_GC_STALE_TIME,
 			.procname	= "gc_stale_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
-			.strategy	= sysctl_jiffies,
 		},
 		{
-			.ctl_name	= NET_NEIGH_UNRES_QLEN,
 			.procname	= "unres_qlen",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	= NET_NEIGH_PROXY_QLEN,
 			.procname	= "proxy_qlen",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
@@ -2652,45 +2763,36 @@ static struct neigh_sysctl_table {
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
 		{
-			.ctl_name	= NET_NEIGH_RETRANS_TIME_MS,
 			.procname	= "retrans_time_ms",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_ms_jiffies,
-			.strategy	= sysctl_ms_jiffies,
 		},
 		{
-			.ctl_name	= NET_NEIGH_REACHABLE_TIME_MS,
 			.procname	= "base_reachable_time_ms",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_ms_jiffies,
-			.strategy	= sysctl_ms_jiffies,
 		},
 		{
-			.ctl_name	= NET_NEIGH_GC_INTERVAL,
 			.procname	= "gc_interval",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
-			.strategy	= sysctl_jiffies,
 		},
 		{
-			.ctl_name	= NET_NEIGH_GC_THRESH1,
 			.procname	= "gc_thresh1",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	= NET_NEIGH_GC_THRESH2,
 			.procname	= "gc_thresh2",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	= NET_NEIGH_GC_THRESH3,
 			.procname	= "gc_thresh3",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
@@ -2701,8 +2803,7 @@ static struct neigh_sysctl_table {
 };
 
 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
-			  int p_id, int pdev_id, char *p_name,
-			  proc_handler *handler, ctl_handler *strategy)
+			  char *p_name, proc_handler *handler)
 {
 	struct neigh_sysctl_table *t;
 	const char *dev_name_source = NULL;
@@ -2713,10 +2814,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 #define NEIGH_CTL_PATH_DEV	3
 
 	struct ctl_path neigh_path[] = {
-		{ .procname = "net",	 .ctl_name = CTL_NET, },
-		{ .procname = "proto",	 .ctl_name = 0, },
-		{ .procname = "neigh",	 .ctl_name = 0, },
-		{ .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, },
+		{ .procname = "net",	 },
+		{ .procname = "proto",	 },
+		{ .procname = "neigh",	 },
+		{ .procname = "default", },
 		{ },
 	};
 
@@ -2741,7 +2842,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 
 	if (dev) {
 		dev_name_source = dev->name;
-		neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex;
 		/* Terminate the table early */
 		memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
 	} else {
@@ -2753,31 +2853,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 	}
 
 
-	if (handler || strategy) {
+	if (handler) {
 		/* RetransTime */
 		t->neigh_vars[3].proc_handler = handler;
-		t->neigh_vars[3].strategy = strategy;
 		t->neigh_vars[3].extra1 = dev;
-		if (!strategy)
-			t->neigh_vars[3].ctl_name = CTL_UNNUMBERED;
 		/* ReachableTime */
 		t->neigh_vars[4].proc_handler = handler;
-		t->neigh_vars[4].strategy = strategy;
 		t->neigh_vars[4].extra1 = dev;
-		if (!strategy)
-			t->neigh_vars[4].ctl_name = CTL_UNNUMBERED;
 		/* RetransTime (in milliseconds)*/
 		t->neigh_vars[12].proc_handler = handler;
-		t->neigh_vars[12].strategy = strategy;
 		t->neigh_vars[12].extra1 = dev;
-		if (!strategy)
-			t->neigh_vars[12].ctl_name = CTL_UNNUMBERED;
 		/* ReachableTime (in milliseconds) */
 		t->neigh_vars[13].proc_handler = handler;
-		t->neigh_vars[13].strategy = strategy;
 		t->neigh_vars[13].extra1 = dev;
-		if (!strategy)
-			t->neigh_vars[13].ctl_name = CTL_UNNUMBERED;
 	}
 
 	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
@@ -2785,9 +2873,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 		goto free;
 
 	neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
-	neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id;
 	neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
-	neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
 
 	t->sysctl_header =
 		register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3994680c08b..e23c01be5a5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -13,10 +13,14 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
+#include <linux/slab.h>
+#include <linux/nsproxy.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
-#include <net/iw_handler.h>
+#include <linux/vmalloc.h>
+#include <net/wext.h>
 
 #include "net-sysfs.h"
 
@@ -25,6 +29,7 @@ static const char fmt_hex[] = "%#x\n";
 static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
 static const char fmt_ulong[] = "%lu\n";
+static const char fmt_u64[] = "%llu\n";
 
 static inline int dev_isalive(const struct net_device *dev)
 {
@@ -90,6 +95,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
 }
 
 NETDEVICE_SHOW(dev_id, fmt_hex);
+NETDEVICE_SHOW(addr_assign_type, fmt_dec);
 NETDEVICE_SHOW(addr_len, fmt_dec);
 NETDEVICE_SHOW(iflink, fmt_dec);
 NETDEVICE_SHOW(ifindex, fmt_dec);
@@ -130,6 +136,48 @@ static ssize_t show_carrier(struct device *dev,
 	return -EINVAL;
 }
 
+static ssize_t show_speed(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	int ret = -EINVAL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (netif_running(netdev) &&
+	    netdev->ethtool_ops &&
+	    netdev->ethtool_ops->get_settings) {
+		struct ethtool_cmd cmd = { ETHTOOL_GSET };
+
+		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
+			ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
+	}
+	rtnl_unlock();
+	return ret;
+}
+
+static ssize_t show_duplex(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	int ret = -EINVAL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (netif_running(netdev) &&
+	    netdev->ethtool_ops &&
+	    netdev->ethtool_ops->get_settings) {
+		struct ethtool_cmd cmd = { ETHTOOL_GSET };
+
+		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
+			ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
+	}
+	rtnl_unlock();
+	return ret;
+}
+
 static ssize_t show_dormant(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -141,7 +189,7 @@ static ssize_t show_dormant(struct device *dev,
 	return -EINVAL;
 }
 
-static const char *operstates[] = {
+static const char *const operstates[] = {
 	"unknown",
 	"notpresent", /* currently unused */
 	"down",
@@ -248,6 +296,7 @@ static ssize_t show_ifalias(struct device *dev,
 }
 
 static struct device_attribute net_class_attributes[] = {
+	__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
 	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
 	__ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
 	__ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
@@ -259,6 +308,8 @@ static struct device_attribute net_class_attributes[] = {
 	__ATTR(address, S_IRUGO, show_address, NULL),
 	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
 	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
+	__ATTR(speed, S_IRUGO, show_speed, NULL),
+	__ATTR(duplex, S_IRUGO, show_duplex, NULL),
 	__ATTR(dormant, S_IRUGO, show_dormant, NULL),
 	__ATTR(operstate, S_IRUGO, show_operstate, NULL),
 	__ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
@@ -276,14 +327,15 @@ static ssize_t netstat_show(const struct device *d,
 	struct net_device *dev = to_net_dev(d);
 	ssize_t ret = -EINVAL;
 
-	WARN_ON(offset > sizeof(struct net_device_stats) ||
-			offset % sizeof(unsigned long) != 0);
+	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
+			offset % sizeof(u64) != 0);
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		const struct net_device_stats *stats = dev_get_stats(dev);
-		ret = sprintf(buf, fmt_ulong,
-			      *(unsigned long *)(((u8 *) stats) + offset));
+		struct rtnl_link_stats64 temp;
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
+		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
 	return ret;
@@ -295,7 +347,7 @@ static ssize_t show_##name(struct device *d,				\
 			   struct device_attribute *attr, char *buf) 	\
 {									\
 	return netstat_show(d, attr, buf,				\
-			    offsetof(struct net_device_stats, name));	\
+			    offsetof(struct rtnl_link_stats64, name));	\
 }									\
 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
 
@@ -363,18 +415,17 @@ static ssize_t wireless_show(struct device *d, char *buf,
 					       char *))
 {
 	struct net_device *dev = to_net_dev(d);
-	const struct iw_statistics *iw = NULL;
+	const struct iw_statistics *iw;
 	ssize_t ret = -EINVAL;
 
-	read_lock(&dev_base_lock);
+	if (!rtnl_trylock())
+		return restart_syscall();
 	if (dev_isalive(dev)) {
-		if (dev->wireless_handlers &&
-		    dev->wireless_handlers->get_wireless_stats)
-			iw = dev->wireless_handlers->get_wireless_stats(dev);
-		if (iw != NULL)
+		iw = get_wireless_stats(dev);
+		if (iw)
 			ret = (*format)(iw, buf);
 	}
-	read_unlock(&dev_base_lock);
+	rtnl_unlock();
 
 	return ret;
 }
@@ -422,18 +473,762 @@ static struct attribute_group wireless_group = {
 	.attrs = wireless_attrs,
 };
 #endif
-
 #endif /* CONFIG_SYSFS */
 
+#ifdef CONFIG_RPS
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, char *buf);
+	ssize_t (*store)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_rx_queue_attr(_attr) container_of(_attr,		\
+    struct rx_queue_attribute, attr)
+
+#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
+
+static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
+				  char *buf)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(queue, attribute, buf, count);
+}
+
+static const struct sysfs_ops rx_queue_sysfs_ops = {
+	.show = rx_queue_attr_show,
+	.store = rx_queue_attr_store,
+};
+
+static ssize_t show_rps_map(struct netdev_rx_queue *queue,
+			    struct rx_queue_attribute *attribute, char *buf)
+{
+	struct rps_map *map;
+	cpumask_var_t mask;
+	size_t len = 0;
+	int i;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	rcu_read_lock();
+	map = rcu_dereference(queue->rps_map);
+	if (map)
+		for (i = 0; i < map->len; i++)
+			cpumask_set_cpu(map->cpus[i], mask);
+
+	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+	if (PAGE_SIZE - len < 3) {
+		rcu_read_unlock();
+		free_cpumask_var(mask);
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	free_cpumask_var(mask);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+static void rps_map_release(struct rcu_head *rcu)
+{
+	struct rps_map *map = container_of(rcu, struct rps_map, rcu);
+
+	kfree(map);
+}
+
+static ssize_t store_rps_map(struct netdev_rx_queue *queue,
+		      struct rx_queue_attribute *attribute,
+		      const char *buf, size_t len)
+{
+	struct rps_map *old_map, *map;
+	cpumask_var_t mask;
+	int err, cpu, i;
+	static DEFINE_SPINLOCK(rps_map_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
+		return err;
+	}
+
+	map = kzalloc(max_t(unsigned,
+	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+	    GFP_KERNEL);
+	if (!map) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+
+	i = 0;
+	for_each_cpu_and(cpu, mask, cpu_online_mask)
+		map->cpus[i++] = cpu;
+
+	if (i)
+		map->len = i;
+	else {
+		kfree(map);
+		map = NULL;
+	}
+
+	spin_lock(&rps_map_lock);
+	old_map = rcu_dereference_protected(queue->rps_map,
+					    lockdep_is_held(&rps_map_lock));
+	rcu_assign_pointer(queue->rps_map, map);
+	spin_unlock(&rps_map_lock);
+
+	if (old_map)
+		call_rcu(&old_map->rcu, rps_map_release);
+
+	free_cpumask_var(mask);
+	return len;
+}
+
+static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+					   struct rx_queue_attribute *attr,
+					   char *buf)
+{
+	struct rps_dev_flow_table *flow_table;
+	unsigned int val = 0;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(queue->rps_flow_table);
+	if (flow_table)
+		val = flow_table->mask + 1;
+	rcu_read_unlock();
+
+	return sprintf(buf, "%u\n", val);
+}
+
+static void rps_dev_flow_table_release_work(struct work_struct *work)
+{
+	struct rps_dev_flow_table *table = container_of(work,
+	    struct rps_dev_flow_table, free_work);
+
+	vfree(table);
+}
+
+static void rps_dev_flow_table_release(struct rcu_head *rcu)
+{
+	struct rps_dev_flow_table *table = container_of(rcu,
+	    struct rps_dev_flow_table, rcu);
+
+	INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
+	schedule_work(&table->free_work);
+}
+
+static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+				     struct rx_queue_attribute *attr,
+				     const char *buf, size_t len)
+{
+	unsigned int count;
+	char *endp;
+	struct rps_dev_flow_table *table, *old_table;
+	static DEFINE_SPINLOCK(rps_dev_flow_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	count = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+
+	if (count) {
+		int i;
+
+		if (count > 1<<30) {
+			/* Enforce a limit to prevent overflow */
+			return -EINVAL;
+		}
+		count = roundup_pow_of_two(count);
+		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
+		if (!table)
+			return -ENOMEM;
+
+		table->mask = count - 1;
+		for (i = 0; i < count; i++)
+			table->flows[i].cpu = RPS_NO_CPU;
+	} else
+		table = NULL;
+
+	spin_lock(&rps_dev_flow_lock);
+	old_table = rcu_dereference_protected(queue->rps_flow_table,
+					      lockdep_is_held(&rps_dev_flow_lock));
+	rcu_assign_pointer(queue->rps_flow_table, table);
+	spin_unlock(&rps_dev_flow_lock);
+
+	if (old_table)
+		call_rcu(&old_table->rcu, rps_dev_flow_table_release);
+
+	return len;
+}
+
+static struct rx_queue_attribute rps_cpus_attribute =
+	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+
+
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
+	__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+	    show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+
+static struct attribute *rx_queue_default_attrs[] = {
+	&rps_cpus_attribute.attr,
+	&rps_dev_flow_table_cnt_attribute.attr,
+	NULL
+};
+
+static void rx_queue_release(struct kobject *kobj)
+{
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+	struct rps_map *map;
+	struct rps_dev_flow_table *flow_table;
+
+
+	map = rcu_dereference_raw(queue->rps_map);
+	if (map) {
+		RCU_INIT_POINTER(queue->rps_map, NULL);
+		call_rcu(&map->rcu, rps_map_release);
+	}
+
+	flow_table = rcu_dereference_raw(queue->rps_flow_table);
+	if (flow_table) {
+		RCU_INIT_POINTER(queue->rps_flow_table, NULL);
+		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+	}
+
+	memset(kobj, 0, sizeof(*kobj));
+	dev_put(queue->dev);
+}
+
+static struct kobj_type rx_queue_ktype = {
+	.sysfs_ops = &rx_queue_sysfs_ops,
+	.release = rx_queue_release,
+	.default_attrs = rx_queue_default_attrs,
+};
+
+static int rx_queue_add_kobject(struct net_device *net, int index)
+{
+	struct netdev_rx_queue *queue = net->_rx + index;
+	struct kobject *kobj = &queue->kobj;
+	int error = 0;
+
+	kobj->kset = net->queues_kset;
+	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+	    "rx-%u", index);
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
+
+	kobject_uevent(kobj, KOBJ_ADD);
+	dev_hold(queue->dev);
+
+	return error;
+}
+#endif /* CONFIG_RPS */
+
+int
+net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+{
+#ifdef CONFIG_RPS
+	int i;
+	int error = 0;
+
+	for (i = old_num; i < new_num; i++) {
+		error = rx_queue_add_kobject(net, i);
+		if (error) {
+			new_num = old_num;
+			break;
+		}
+	}
+
+	while (--i >= new_num)
+		kobject_put(&net->_rx[i].kobj);
+
+	return error;
+#else
+	return 0;
+#endif
+}
+
+#ifdef CONFIG_XPS
+/*
+ * netdev_queue sysfs structures and functions.
+ */
+struct netdev_queue_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct netdev_queue *queue,
+	    struct netdev_queue_attribute *attr, char *buf);
+	ssize_t (*store)(struct netdev_queue *queue,
+	    struct netdev_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_netdev_queue_attr(_attr) container_of(_attr,		\
+    struct netdev_queue_attribute, attr)
+
+#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
+
+static ssize_t netdev_queue_attr_show(struct kobject *kobj,
+				      struct attribute *attr, char *buf)
+{
+	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t netdev_queue_attr_store(struct kobject *kobj,
+				       struct attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(queue, attribute, buf, count);
+}
+
+static const struct sysfs_ops netdev_queue_sysfs_ops = {
+	.show = netdev_queue_attr_show,
+	.store = netdev_queue_attr_store,
+};
+
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+{
+	struct net_device *dev = queue->dev;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++)
+		if (queue == &dev->_tx[i])
+			break;
+
+	BUG_ON(i >= dev->num_tx_queues);
+
+	return i;
+}
+
+
+static ssize_t show_xps_map(struct netdev_queue *queue,
+			    struct netdev_queue_attribute *attribute, char *buf)
+{
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	cpumask_var_t mask;
+	unsigned long index;
+	size_t len = 0;
+	int i;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		for_each_possible_cpu(i) {
+			struct xps_map *map =
+			    rcu_dereference(dev_maps->cpu_map[i]);
+			if (map) {
+				int j;
+				for (j = 0; j < map->len; j++) {
+					if (map->queues[j] == index) {
+						cpumask_set_cpu(i, mask);
+						break;
+					}
+				}
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+	if (PAGE_SIZE - len < 3) {
+		free_cpumask_var(mask);
+		return -EINVAL;
+	}
+
+	free_cpumask_var(mask);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+static void xps_map_release(struct rcu_head *rcu)
+{
+	struct xps_map *map = container_of(rcu, struct xps_map, rcu);
+
+	kfree(map);
+}
+
+static void xps_dev_maps_release(struct rcu_head *rcu)
+{
+	struct xps_dev_maps *dev_maps =
+	    container_of(rcu, struct xps_dev_maps, rcu);
+
+	kfree(dev_maps);
+}
+
+static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P)		\
+	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+
+static ssize_t store_xps_map(struct netdev_queue *queue,
+		      struct netdev_queue_attribute *attribute,
+		      const char *buf, size_t len)
+{
+	struct net_device *dev = queue->dev;
+	cpumask_var_t mask;
+	int err, i, cpu, pos, map_len, alloc_len, need_set;
+	unsigned long index;
+	struct xps_map *map, *new_map;
+	struct xps_dev_maps *dev_maps, *new_dev_maps;
+	int nonempty = 0;
+	int numa_node = -2;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
+		return err;
+	}
+
+	new_dev_maps = kzalloc(max_t(unsigned,
+	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
+	if (!new_dev_maps) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+
+	mutex_lock(&xps_map_mutex);
+
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		new_map = map;
+		if (map) {
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+			map_len = map->len;
+			alloc_len = map->alloc_len;
+		} else
+			pos = map_len = alloc_len = 0;
+
+		need_set = cpu_isset(cpu, *mask) && cpu_online(cpu);
+#ifdef CONFIG_NUMA
+		if (need_set) {
+			if (numa_node == -2)
+				numa_node = cpu_to_node(cpu);
+			else if (numa_node != cpu_to_node(cpu))
+				numa_node = -1;
+		}
+#endif
+		if (need_set && pos >= map_len) {
+			/* Need to add queue to this CPU's map */
+			if (map_len >= alloc_len) {
+				alloc_len = alloc_len ?
+				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
+				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
+						       GFP_KERNEL,
+						       cpu_to_node(cpu));
+				if (!new_map)
+					goto error;
+				new_map->alloc_len = alloc_len;
+				for (i = 0; i < map_len; i++)
+					new_map->queues[i] = map->queues[i];
+				new_map->len = map_len;
+			}
+			new_map->queues[new_map->len++] = index;
+		} else if (!need_set && pos < map_len) {
+			/* Need to remove queue from this CPU's map */
+			if (map_len > 1)
+				new_map->queues[pos] =
+				    new_map->queues[--new_map->len];
+			else
+				new_map = NULL;
+		}
+		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
+	}
+
+	/* Cleanup old maps */
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
+			call_rcu(&map->rcu, xps_map_release);
+		if (new_dev_maps->cpu_map[cpu])
+			nonempty = 1;
+	}
+
+	if (nonempty)
+		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	else {
+		kfree(new_dev_maps);
+		rcu_assign_pointer(dev->xps_maps, NULL);
+	}
+
+	if (dev_maps)
+		call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+
+	netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
+					    NUMA_NO_NODE);
+
+	mutex_unlock(&xps_map_mutex);
+
+	free_cpumask_var(mask);
+	return len;
+
+error:
+	mutex_unlock(&xps_map_mutex);
+
+	if (new_dev_maps)
+		for_each_possible_cpu(i)
+			kfree(rcu_dereference_protected(
+				new_dev_maps->cpu_map[i],
+				1));
+	kfree(new_dev_maps);
+	free_cpumask_var(mask);
+	return -ENOMEM;
+}
+
+static struct netdev_queue_attribute xps_cpus_attribute =
+    __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+
+static struct attribute *netdev_queue_default_attrs[] = {
+	&xps_cpus_attribute.attr,
+	NULL
+};
+
+static void netdev_queue_release(struct kobject *kobj)
+{
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	unsigned long index;
+	int i, pos, nonempty = 0;
+
+	index = get_netdev_queue_index(queue);
+
+	mutex_lock(&xps_map_mutex);
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	if (dev_maps) {
+		for_each_possible_cpu(i) {
+			map = xmap_dereference(dev_maps->cpu_map[i]);
+			if (!map)
+				continue;
+
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+
+			if (pos < map->len) {
+				if (map->len > 1)
+					map->queues[pos] =
+					    map->queues[--map->len];
+				else {
+					RCU_INIT_POINTER(dev_maps->cpu_map[i],
+					    NULL);
+					call_rcu(&map->rcu, xps_map_release);
+					map = NULL;
+				}
+			}
+			if (map)
+				nonempty = 1;
+		}
+
+		if (!nonempty) {
+			RCU_INIT_POINTER(dev->xps_maps, NULL);
+			call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+		}
+	}
+
+	mutex_unlock(&xps_map_mutex);
+
+	memset(kobj, 0, sizeof(*kobj));
+	dev_put(queue->dev);
+}
+
+static struct kobj_type netdev_queue_ktype = {
+	.sysfs_ops = &netdev_queue_sysfs_ops,
+	.release = netdev_queue_release,
+	.default_attrs = netdev_queue_default_attrs,
+};
+
+static int netdev_queue_add_kobject(struct net_device *net, int index)
+{
+	struct netdev_queue *queue = net->_tx + index;
+	struct kobject *kobj = &queue->kobj;
+	int error = 0;
+
+	kobj->kset = net->queues_kset;
+	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
+	    "tx-%u", index);
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
+
+	kobject_uevent(kobj, KOBJ_ADD);
+	dev_hold(queue->dev);
+
+	return error;
+}
+#endif /* CONFIG_XPS */
+
+int
+netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+{
+#ifdef CONFIG_XPS
+	int i;
+	int error = 0;
+
+	for (i = old_num; i < new_num; i++) {
+		error = netdev_queue_add_kobject(net, i);
+		if (error) {
+			new_num = old_num;
+			break;
+		}
+	}
+
+	while (--i >= new_num)
+		kobject_put(&net->_tx[i].kobj);
+
+	return error;
+#else
+	return 0;
+#endif
+}
+
+static int register_queue_kobjects(struct net_device *net)
+{
+	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
+
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+	net->queues_kset = kset_create_and_add("queues",
+	    NULL, &net->dev.kobj);
+	if (!net->queues_kset)
+		return -ENOMEM;
+#endif
+
+#ifdef CONFIG_RPS
+	real_rx = net->real_num_rx_queues;
+#endif
+	real_tx = net->real_num_tx_queues;
+
+	error = net_rx_queue_update_kobjects(net, 0, real_rx);
+	if (error)
+		goto error;
+	rxq = real_rx;
+
+	error = netdev_queue_update_kobjects(net, 0, real_tx);
+	if (error)
+		goto error;
+	txq = real_tx;
+
+	return 0;
+
+error:
+	netdev_queue_update_kobjects(net, txq, 0);
+	net_rx_queue_update_kobjects(net, rxq, 0);
+	return error;
+}
+
+static void remove_queue_kobjects(struct net_device *net)
+{
+	int real_rx = 0, real_tx = 0;
+
+#ifdef CONFIG_RPS
+	real_rx = net->real_num_rx_queues;
+#endif
+	real_tx = net->real_num_tx_queues;
+
+	net_rx_queue_update_kobjects(net, real_rx, 0);
+	netdev_queue_update_kobjects(net, real_tx, 0);
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+	kset_unregister(net->queues_kset);
+#endif
+}
+
+static const void *net_current_ns(void)
+{
+	return current->nsproxy->net_ns;
+}
+
+static const void *net_initial_ns(void)
+{
+	return &init_net;
+}
+
+static const void *net_netlink_ns(struct sock *sk)
+{
+	return sock_net(sk);
+}
+
+struct kobj_ns_type_operations net_ns_type_operations = {
+	.type = KOBJ_NS_TYPE_NET,
+	.current_ns = net_current_ns,
+	.netlink_ns = net_netlink_ns,
+	.initial_ns = net_initial_ns,
+};
+EXPORT_SYMBOL_GPL(net_ns_type_operations);
+
+static void net_kobj_ns_exit(struct net *net)
+{
+	kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
+}
+
+static struct pernet_operations kobj_net_ops = {
+	.exit = net_kobj_ns_exit,
+};
+
+
 #ifdef CONFIG_HOTPLUG
 static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 {
 	struct net_device *dev = to_net_dev(d);
 	int retval;
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return 0;
-
 	/* pass interface to uevent. */
 	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
 	if (retval)
@@ -463,6 +1258,13 @@ static void netdev_release(struct device *d)
 	kfree((char *)dev - dev->padded);
 }
 
+static const void *net_namespace(struct device *d)
+{
+	struct net_device *dev;
+	dev = container_of(d, struct net_device, dev);
+	return dev_net(dev);
+}
+
 static struct class net_class = {
 	.name = "net",
 	.dev_release = netdev_release,
@@ -472,6 +1274,8 @@ static struct class net_class = {
 #ifdef CONFIG_HOTPLUG
 	.dev_uevent = netdev_uevent,
 #endif
+	.ns_type = &net_ns_type_operations,
+	.namespace = net_namespace,
 };
 
 /* Delete sysfs entries but hold kobject reference until after all
@@ -483,8 +1287,7 @@ void netdev_unregister_kobject(struct net_device * net)
 
 	kobject_get(&dev->kobj);
 
-	if (dev_net(net) != &init_net)
-		return;
+	remove_queue_kobjects(net);
 
 	device_del(dev);
 }
@@ -493,8 +1296,10 @@ void netdev_unregister_kobject(struct net_device * net)
 int netdev_register_kobject(struct net_device *net)
 {
 	struct device *dev = &(net->dev);
-	struct attribute_group **groups = net->sysfs_groups;
+	const struct attribute_group **groups = net->sysfs_groups;
+	int error = 0;
 
+	device_initialize(dev);
 	dev->class = &net_class;
 	dev->platform_data = net;
 	dev->groups = groups;
@@ -502,40 +1307,49 @@ int netdev_register_kobject(struct net_device *net)
 	dev_set_name(dev, "%s", net->name);
 
 #ifdef CONFIG_SYSFS
-	*groups++ = &netstat_group;
+	/* Allow for a device specific group */
+	if (*groups)
+		groups++;
 
+	*groups++ = &netstat_group;
 #ifdef CONFIG_WIRELESS_EXT_SYSFS
-	if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
+	if (net->ieee80211_ptr)
 		*groups++ = &wireless_group;
+#ifdef CONFIG_WIRELESS_EXT
+	else if (net->wireless_handlers)
+		*groups++ = &wireless_group;
+#endif
 #endif
 #endif /* CONFIG_SYSFS */
 
-	if (dev_net(net) != &init_net)
-		return 0;
+	error = device_add(dev);
+	if (error)
+		return error;
 
-	return device_add(dev);
+	error = register_queue_kobjects(net);
+	if (error) {
+		device_del(dev);
+		return error;
+	}
+
+	return error;
 }
 
 int netdev_class_create_file(struct class_attribute *class_attr)
 {
 	return class_create_file(&net_class, class_attr);
 }
+EXPORT_SYMBOL(netdev_class_create_file);
 
 void netdev_class_remove_file(struct class_attribute *class_attr)
 {
 	class_remove_file(&net_class, class_attr);
 }
-
-EXPORT_SYMBOL(netdev_class_create_file);
 EXPORT_SYMBOL(netdev_class_remove_file);
 
-void netdev_initialize_kobject(struct net_device *net)
-{
-	struct device *device = &(net->dev);
-	device_initialize(device);
-}
-
 int netdev_kobject_init(void)
 {
+	kobj_ns_type_register(&net_ns_type_operations);
+	register_pernet_subsys(&kobj_net_ops);
 	return class_register(&net_class);
 }
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 14e7524260b..bd7751ec1c4 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,5 +4,8 @@
 int netdev_kobject_init(void);
 int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
-void netdev_initialize_kobject(struct net_device *);
+int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
+int netdev_queue_update_kobjects(struct net_device *net,
+				 int old_num, int new_num);
+
 #endif
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index f1e982c508b..7f1bb2aba03 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,12 +19,14 @@
 #include <linux/workqueue.h>
 #include <linux/netlink.h>
 #include <linux/net_dropmon.h>
+#include <linux/slab.h>
 
 #include <asm/unaligned.h>
 #include <asm/bitops.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/skb.h>
+#include <trace/events/net.h>
 #include <trace/events/napi.h>
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b7292a2719d..3f860261c5e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -6,6 +6,8 @@
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/idr.h>
+#include <linux/rculist.h>
+#include <linux/nsproxy.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -25,14 +27,111 @@ EXPORT_SYMBOL(init_net);
 
 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
 
+static void net_generic_release(struct rcu_head *rcu)
+{
+	struct net_generic *ng;
+
+	ng = container_of(rcu, struct net_generic, rcu);
+	kfree(ng);
+}
+
+static int net_assign_generic(struct net *net, int id, void *data)
+{
+	struct net_generic *ng, *old_ng;
+
+	BUG_ON(!mutex_is_locked(&net_mutex));
+	BUG_ON(id == 0);
+
+	old_ng = rcu_dereference_protected(net->gen,
+					   lockdep_is_held(&net_mutex));
+	ng = old_ng;
+	if (old_ng->len >= id)
+		goto assign;
+
+	ng = kzalloc(sizeof(struct net_generic) +
+			id * sizeof(void *), GFP_KERNEL);
+	if (ng == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Some synchronisation notes:
+	 *
+	 * The net_generic explores the net->gen array inside rcu
+	 * read section. Besides once set the net->gen->ptr[x]
+	 * pointer never changes (see rules in netns/generic.h).
+	 *
+	 * That said, we simply duplicate this array and schedule
+	 * the old copy for kfree after a grace period.
+	 */
+
+	ng->len = id;
+	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
+
+	rcu_assign_pointer(net->gen, ng);
+	call_rcu(&old_ng->rcu, net_generic_release);
+assign:
+	ng->ptr[id - 1] = data;
+	return 0;
+}
+
+static int ops_init(const struct pernet_operations *ops, struct net *net)
+{
+	int err;
+	if (ops->id && ops->size) {
+		void *data = kzalloc(ops->size, GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+
+		err = net_assign_generic(net, *ops->id, data);
+		if (err) {
+			kfree(data);
+			return err;
+		}
+	}
+	if (ops->init)
+		return ops->init(net);
+	return 0;
+}
+
+static void ops_free(const struct pernet_operations *ops, struct net *net)
+{
+	if (ops->id && ops->size) {
+		int id = *ops->id;
+		kfree(net_generic(net, id));
+	}
+}
+
+static void ops_exit_list(const struct pernet_operations *ops,
+			  struct list_head *net_exit_list)
+{
+	struct net *net;
+	if (ops->exit) {
+		list_for_each_entry(net, net_exit_list, exit_list)
+			ops->exit(net);
+	}
+	if (ops->exit_batch)
+		ops->exit_batch(net_exit_list);
+}
+
+static void ops_free_list(const struct pernet_operations *ops,
+			  struct list_head *net_exit_list)
+{
+	struct net *net;
+	if (ops->size && ops->id) {
+		list_for_each_entry(net, net_exit_list, exit_list)
+			ops_free(ops, net);
+	}
+}
+
 /*
  * setup_net runs the initializers for the network namespace object.
  */
 static __net_init int setup_net(struct net *net)
 {
 	/* Must be called with net_mutex held */
-	struct pernet_operations *ops;
+	const struct pernet_operations *ops, *saved_ops;
 	int error = 0;
+	LIST_HEAD(net_exit_list);
 
 	atomic_set(&net->count, 1);
 
@@ -41,11 +140,9 @@ static __net_init int setup_net(struct net *net)
 #endif
 
 	list_for_each_entry(ops, &pernet_list, list) {
-		if (ops->init) {
-			error = ops->init(net);
-			if (error < 0)
-				goto out_undo;
-		}
+		error = ops_init(ops, net);
+		if (error < 0)
+			goto out_undo;
 	}
 out:
 	return error;
@@ -54,10 +151,14 @@ out_undo:
 	/* Walk through the list backwards calling the exit functions
 	 * for the pernet modules whose init functions did not fail.
 	 */
-	list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
-		if (ops->exit)
-			ops->exit(net);
-	}
+	list_add(&net->exit_list, &net_exit_list);
+	saved_ops = ops;
+	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
+		ops_exit_list(ops, &net_exit_list);
+
+	ops = saved_ops;
+	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
+		ops_free_list(ops, &net_exit_list);
 
 	rcu_barrier();
 	goto out;
@@ -127,7 +228,7 @@ static struct net *net_create(void)
 	rv = setup_net(net);
 	if (rv == 0) {
 		rtnl_lock();
-		list_add_tail(&net->list, &net_namespace_list);
+		list_add_tail_rcu(&net->list, &net_namespace_list);
 		rtnl_unlock();
 	}
 	mutex_unlock(&net_mutex);
@@ -145,25 +246,45 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 	return net_create();
 }
 
+static DEFINE_SPINLOCK(cleanup_list_lock);
+static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
+
 static void cleanup_net(struct work_struct *work)
 {
-	struct pernet_operations *ops;
-	struct net *net;
+	const struct pernet_operations *ops;
+	struct net *net, *tmp;
+	LIST_HEAD(net_kill_list);
+	LIST_HEAD(net_exit_list);
 
-	net = container_of(work, struct net, work);
+	/* Atomically snapshot the list of namespaces to cleanup */
+	spin_lock_irq(&cleanup_list_lock);
+	list_replace_init(&cleanup_list, &net_kill_list);
+	spin_unlock_irq(&cleanup_list_lock);
 
 	mutex_lock(&net_mutex);
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
-	list_del(&net->list);
+	list_for_each_entry(net, &net_kill_list, cleanup_list) {
+		list_del_rcu(&net->list);
+		list_add_tail(&net->exit_list, &net_exit_list);
+	}
 	rtnl_unlock();
 
+	/*
+	 * Another CPU might be rcu-iterating the list, wait for it.
+	 * This needs to be before calling the exit() notifiers, so
+	 * the rcu_barrier() below isn't sufficient alone.
+	 */
+	synchronize_rcu();
+
 	/* Run all of the network namespace exit methods */
-	list_for_each_entry_reverse(ops, &pernet_list, list) {
-		if (ops->exit)
-			ops->exit(net);
-	}
+	list_for_each_entry_reverse(ops, &pernet_list, list)
+		ops_exit_list(ops, &net_exit_list);
+
+	/* Free the net generic variables */
+	list_for_each_entry_reverse(ops, &pernet_list, list)
+		ops_free_list(ops, &net_exit_list);
 
 	mutex_unlock(&net_mutex);
 
@@ -173,14 +294,23 @@ static void cleanup_net(struct work_struct *work)
 	rcu_barrier();
 
 	/* Finally it is safe to free my network namespace structure */
-	net_free(net);
+	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
+		list_del_init(&net->exit_list);
+		net_free(net);
+	}
 }
+static DECLARE_WORK(net_cleanup_work, cleanup_net);
 
 void __put_net(struct net *net)
 {
 	/* Cleanup the network namespace in process context */
-	INIT_WORK(&net->work, cleanup_net);
-	queue_work(netns_wq, &net->work);
+	unsigned long flags;
+
+	spin_lock_irqsave(&cleanup_list_lock, flags);
+	list_add(&net->cleanup_list, &cleanup_list);
+	spin_unlock_irqrestore(&cleanup_list_lock, flags);
+
+	queue_work(netns_wq, &net_cleanup_work);
 }
 EXPORT_SYMBOL_GPL(__put_net);
 
@@ -193,6 +323,26 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 }
 #endif
 
+struct net *get_net_ns_by_pid(pid_t pid)
+{
+	struct task_struct *tsk;
+	struct net *net;
+
+	/* Lookup the network namespace */
+	net = ERR_PTR(-ESRCH);
+	rcu_read_lock();
+	tsk = find_task_by_vpid(pid);
+	if (tsk) {
+		struct nsproxy *nsproxy;
+		nsproxy = task_nsproxy(tsk);
+		if (nsproxy)
+			net = get_net(nsproxy->net_ns);
+	}
+	rcu_read_unlock();
+	return net;
+}
+EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -219,7 +369,7 @@ static int __init net_ns_init(void)
 		panic("Could not setup the initial network namespace");
 
 	rtnl_lock();
-	list_add_tail(&init_net.list, &net_namespace_list);
+	list_add_tail_rcu(&init_net.list, &net_namespace_list);
 	rtnl_unlock();
 
 	mutex_unlock(&net_mutex);
@@ -230,18 +380,20 @@ static int __init net_ns_init(void)
 pure_initcall(net_ns_init);
 
 #ifdef CONFIG_NET_NS
-static int register_pernet_operations(struct list_head *list,
-				      struct pernet_operations *ops)
+static int __register_pernet_operations(struct list_head *list,
+					struct pernet_operations *ops)
 {
-	struct net *net, *undo_net;
+	struct net *net;
 	int error;
+	LIST_HEAD(net_exit_list);
 
 	list_add_tail(&ops->list, list);
-	if (ops->init) {
+	if (ops->init || (ops->id && ops->size)) {
 		for_each_net(net) {
-			error = ops->init(net);
+			error = ops_init(ops, net);
 			if (error)
 				goto out_undo;
+			list_add_tail(&net->exit_list, &net_exit_list);
 		}
 	}
 	return 0;
@@ -249,45 +401,82 @@ static int register_pernet_operations(struct list_head *list,
 out_undo:
 	/* If I have an error cleanup all namespaces I initialized */
 	list_del(&ops->list);
-	if (ops->exit) {
-		for_each_net(undo_net) {
-			if (undo_net == net)
-				goto undone;
-			ops->exit(undo_net);
-		}
-	}
-undone:
+	ops_exit_list(ops, &net_exit_list);
+	ops_free_list(ops, &net_exit_list);
 	return error;
 }
 
-static void unregister_pernet_operations(struct pernet_operations *ops)
+static void __unregister_pernet_operations(struct pernet_operations *ops)
 {
 	struct net *net;
+	LIST_HEAD(net_exit_list);
 
 	list_del(&ops->list);
-	if (ops->exit)
-		for_each_net(net)
-			ops->exit(net);
+	for_each_net(net)
+		list_add_tail(&net->exit_list, &net_exit_list);
+	ops_exit_list(ops, &net_exit_list);
+	ops_free_list(ops, &net_exit_list);
 }
 
 #else
 
+static int __register_pernet_operations(struct list_head *list,
+					struct pernet_operations *ops)
+{
+	int err = 0;
+	err = ops_init(ops, &init_net);
+	if (err)
+		ops_free(ops, &init_net);
+	return err;
+	
+}
+
+static void __unregister_pernet_operations(struct pernet_operations *ops)
+{
+	LIST_HEAD(net_exit_list);
+	list_add(&init_net.exit_list, &net_exit_list);
+	ops_exit_list(ops, &net_exit_list);
+	ops_free_list(ops, &net_exit_list);
+}
+
+#endif /* CONFIG_NET_NS */
+
+static DEFINE_IDA(net_generic_ids);
+
 static int register_pernet_operations(struct list_head *list,
 				      struct pernet_operations *ops)
 {
-	if (ops->init == NULL)
-		return 0;
-	return ops->init(&init_net);
+	int error;
+
+	if (ops->id) {
+again:
+		error = ida_get_new_above(&net_generic_ids, 1, ops->id);
+		if (error < 0) {
+			if (error == -EAGAIN) {
+				ida_pre_get(&net_generic_ids, GFP_KERNEL);
+				goto again;
+			}
+			return error;
+		}
+	}
+	error = __register_pernet_operations(list, ops);
+	if (error) {
+		rcu_barrier();
+		if (ops->id)
+			ida_remove(&net_generic_ids, *ops->id);
+	}
+
+	return error;
 }
 
 static void unregister_pernet_operations(struct pernet_operations *ops)
 {
-	if (ops->exit)
-		ops->exit(&init_net);
+	
+	__unregister_pernet_operations(ops);
+	rcu_barrier();
+	if (ops->id)
+		ida_remove(&net_generic_ids, *ops->id);
 }
-#endif
-
-static DEFINE_IDA(net_generic_ids);
 
 /**
  *      register_pernet_subsys - register a network namespace subsystem
@@ -327,45 +516,13 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
  *	addition run the exit method for all existing network
  *	namespaces.
  */
-void unregister_pernet_subsys(struct pernet_operations *module)
-{
-	mutex_lock(&net_mutex);
-	unregister_pernet_operations(module);
-	mutex_unlock(&net_mutex);
-}
-EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
-
-int register_pernet_gen_subsys(int *id, struct pernet_operations *ops)
-{
-	int rv;
-
-	mutex_lock(&net_mutex);
-again:
-	rv = ida_get_new_above(&net_generic_ids, 1, id);
-	if (rv < 0) {
-		if (rv == -EAGAIN) {
-			ida_pre_get(&net_generic_ids, GFP_KERNEL);
-			goto again;
-		}
-		goto out;
-	}
-	rv = register_pernet_operations(first_device, ops);
-	if (rv < 0)
-		ida_remove(&net_generic_ids, *id);
-out:
-	mutex_unlock(&net_mutex);
-	return rv;
-}
-EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
-
-void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops)
+void unregister_pernet_subsys(struct pernet_operations *ops)
 {
 	mutex_lock(&net_mutex);
 	unregister_pernet_operations(ops);
-	ida_remove(&net_generic_ids, id);
 	mutex_unlock(&net_mutex);
 }
-EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys);
+EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 
 /**
  *      register_pernet_device - register a network namespace device
@@ -398,30 +555,6 @@ int register_pernet_device(struct pernet_operations *ops)
 }
 EXPORT_SYMBOL_GPL(register_pernet_device);
 
-int register_pernet_gen_device(int *id, struct pernet_operations *ops)
-{
-	int error;
-	mutex_lock(&net_mutex);
-again:
-	error = ida_get_new_above(&net_generic_ids, 1, id);
-	if (error) {
-		if (error == -EAGAIN) {
-			ida_pre_get(&net_generic_ids, GFP_KERNEL);
-			goto again;
-		}
-		goto out;
-	}
-	error = register_pernet_operations(&pernet_list, ops);
-	if (error)
-		ida_remove(&net_generic_ids, *id);
-	else if (first_device == &pernet_list)
-		first_device = &ops->list;
-out:
-	mutex_unlock(&net_mutex);
-	return error;
-}
-EXPORT_SYMBOL_GPL(register_pernet_gen_device);
-
 /**
  *      unregister_pernet_device - unregister a network namespace netdevice
  *	@ops: pernet operations structure to manipulate
@@ -440,60 +573,3 @@ void unregister_pernet_device(struct pernet_operations *ops)
 	mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
-
-void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
-{
-	mutex_lock(&net_mutex);
-	if (&ops->list == first_device)
-		first_device = first_device->next;
-	unregister_pernet_operations(ops);
-	ida_remove(&net_generic_ids, id);
-	mutex_unlock(&net_mutex);
-}
-EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
-
-static void net_generic_release(struct rcu_head *rcu)
-{
-	struct net_generic *ng;
-
-	ng = container_of(rcu, struct net_generic, rcu);
-	kfree(ng);
-}
-
-int net_assign_generic(struct net *net, int id, void *data)
-{
-	struct net_generic *ng, *old_ng;
-
-	BUG_ON(!mutex_is_locked(&net_mutex));
-	BUG_ON(id == 0);
-
-	ng = old_ng = net->gen;
-	if (old_ng->len >= id)
-		goto assign;
-
-	ng = kzalloc(sizeof(struct net_generic) +
-			id * sizeof(void *), GFP_KERNEL);
-	if (ng == NULL)
-		return -ENOMEM;
-
-	/*
-	 * Some synchronisation notes:
-	 *
-	 * The net_generic explores the net->gen array inside rcu
-	 * read section. Besides once set the net->gen->ptr[x]
-	 * pointer never changes (see rules in netns/generic.h).
-	 *
-	 * That said, we simply duplicate this array and schedule
-	 * the old copy for kfree after a grace period.
-	 */
-
-	ng->len = id;
-	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
-
-	rcu_assign_pointer(net->gen, ng);
-	call_rcu(&old_ng->rcu, net_generic_release);
-assign:
-	ng->ptr[id - 1] = data;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(net_assign_generic);
diff --git a/net/core/netevent.c b/net/core/netevent.c
index 95f81de8750..865f0ceb81f 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -35,6 +35,7 @@ int register_netevent_notifier(struct notifier_block *nb)
 	err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
 	return err;
 }
+EXPORT_SYMBOL_GPL(register_netevent_notifier);
 
 /**
  *	netevent_unregister_notifier - unregister a netevent notifier block
@@ -50,6 +51,7 @@ int unregister_netevent_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
 }
+EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
 
 /**
  *	call_netevent_notifiers - call all netevent notifier blocks
@@ -64,7 +66,4 @@ int call_netevent_notifiers(unsigned long val, void *v)
 {
 	return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
 }
-
-EXPORT_SYMBOL_GPL(register_netevent_notifier);
-EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
 EXPORT_SYMBOL_GPL(call_netevent_notifiers);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9675f312830..02dc2cbcbe8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -9,6 +9,7 @@
  * Copyright (C) 2002  Red Hat, Inc.
  */
 
+#include <linux/moduleparam.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/string.h>
@@ -21,6 +22,7 @@
 #include <linux/delay.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
+#include <linux/slab.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <asm/unaligned.h>
@@ -33,7 +35,6 @@
 
 #define MAX_UDP_CHUNK 1460
 #define MAX_SKBS 32
-#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
 
 static struct sk_buff_head skb_pool;
 
@@ -50,6 +51,9 @@ static atomic_t trapped;
 static void zap_completion_queue(void);
 static void arp_reply(struct sk_buff *skb);
 
+static unsigned int carrier_timeout = 4;
+module_param(carrier_timeout, uint, 0644);
+
 static void queue_process(struct work_struct *work)
 {
 	struct netpoll_info *npinfo =
@@ -71,8 +75,7 @@ static void queue_process(struct work_struct *work)
 
 		local_irq_save(flags);
 		__netif_tx_lock(txq, smp_processor_id());
-		if (netif_tx_queue_stopped(txq) ||
-		    netif_tx_queue_frozen(txq) ||
+		if (netif_tx_queue_frozen_or_stopped(txq) ||
 		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
@@ -174,9 +177,8 @@ static void service_arp_queue(struct netpoll_info *npi)
 	}
 }
 
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops;
 
 	if (!dev || !netif_running(dev))
@@ -195,6 +197,13 @@ void netpoll_poll(struct netpoll *np)
 
 	zap_completion_queue();
 }
+EXPORT_SYMBOL(netpoll_poll_dev);
+
+void netpoll_poll(struct netpoll *np)
+{
+	netpoll_poll_dev(np->dev);
+}
+EXPORT_SYMBOL(netpoll_poll);
 
 static void refill_skbs(void)
 {
@@ -277,12 +286,13 @@ static int netpoll_owner_active(struct net_device *dev)
 	return 0;
 }
 
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
+			     struct net_device *dev)
 {
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo = np->dev->npinfo;
 
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -303,7 +313,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
+					dev->priv_flags |= IFF_IN_NETPOLL;
 					status = ops->ndo_start_xmit(skb, dev);
+					dev->priv_flags &= ~IFF_IN_NETPOLL;
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
 				}
@@ -319,6 +331,11 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
 			udelay(USEC_PER_POLL);
 		}
+
+		WARN_ONCE(!irqs_disabled(),
+			"netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
+			dev->name, ops->ndo_start_xmit);
+
 		local_irq_restore(flags);
 	}
 
@@ -327,6 +344,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
 }
+EXPORT_SYMBOL(netpoll_send_skb_on_dev);
 
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 {
@@ -388,6 +406,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 
 	netpoll_send_skb(np, skb);
 }
+EXPORT_SYMBOL(netpoll_send_udp);
 
 static void arp_reply(struct sk_buff *skb)
 {
@@ -398,11 +417,24 @@ static void arp_reply(struct sk_buff *skb)
 	__be32 sip, tip;
 	unsigned char *sha;
 	struct sk_buff *send_skb;
-	struct netpoll *np = NULL;
+	struct netpoll *np, *tmp;
+	unsigned long flags;
+	int hits = 0;
+
+	if (list_empty(&npinfo->rx_np))
+		return;
+
+	/* Before checking the packet, we do some early
+	   inspection whether this is interesting at all */
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->dev == skb->dev)
+			hits++;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
-	if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
-		np = npinfo->rx_np;
-	if (!np)
+	/* No netpoll struct is using this dev */
+	if (!hits)
 		return;
 
 	/* No arp on this interface */
@@ -428,77 +460,91 @@ static void arp_reply(struct sk_buff *skb)
 	arp_ptr += skb->dev->addr_len;
 	memcpy(&sip, arp_ptr, 4);
 	arp_ptr += 4;
-	/* if we actually cared about dst hw addr, it would get copied here */
+	/* If we actually cared about dst hw addr,
+	   it would get copied here */
 	arp_ptr += skb->dev->addr_len;
 	memcpy(&tip, arp_ptr, 4);
 
 	/* Should we ignore arp? */
-	if (tip != np->local_ip ||
-	    ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
 		return;
 
 	size = arp_hdr_len(skb->dev);
-	send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
-			    LL_RESERVED_SPACE(np->dev));
 
-	if (!send_skb)
-		return;
-
-	skb_reset_network_header(send_skb);
-	arp = (struct arphdr *) skb_put(send_skb, size);
-	send_skb->dev = skb->dev;
-	send_skb->protocol = htons(ETH_P_ARP);
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (tip != np->local_ip)
+			continue;
 
-	/* Fill the device header for the ARP frame */
-	if (dev_hard_header(send_skb, skb->dev, ptype,
-			    sha, np->dev->dev_addr,
-			    send_skb->len) < 0) {
-		kfree_skb(send_skb);
-		return;
-	}
+		send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
+				    LL_RESERVED_SPACE(np->dev));
+		if (!send_skb)
+			continue;
 
-	/*
-	 * Fill out the arp protocol part.
-	 *
-	 * we only support ethernet device type,
-	 * which (according to RFC 1390) should always equal 1 (Ethernet).
-	 */
+		skb_reset_network_header(send_skb);
+		arp = (struct arphdr *) skb_put(send_skb, size);
+		send_skb->dev = skb->dev;
+		send_skb->protocol = htons(ETH_P_ARP);
 
-	arp->ar_hrd = htons(np->dev->type);
-	arp->ar_pro = htons(ETH_P_IP);
-	arp->ar_hln = np->dev->addr_len;
-	arp->ar_pln = 4;
-	arp->ar_op = htons(type);
+		/* Fill the device header for the ARP frame */
+		if (dev_hard_header(send_skb, skb->dev, ptype,
+				    sha, np->dev->dev_addr,
+				    send_skb->len) < 0) {
+			kfree_skb(send_skb);
+			continue;
+		}
 
-	arp_ptr=(unsigned char *)(arp + 1);
-	memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
-	arp_ptr += np->dev->addr_len;
-	memcpy(arp_ptr, &tip, 4);
-	arp_ptr += 4;
-	memcpy(arp_ptr, sha, np->dev->addr_len);
-	arp_ptr += np->dev->addr_len;
-	memcpy(arp_ptr, &sip, 4);
+		/*
+		 * Fill out the arp protocol part.
+		 *
+		 * we only support ethernet device type,
+		 * which (according to RFC 1390) should
+		 * always equal 1 (Ethernet).
+		 */
 
-	netpoll_send_skb(np, send_skb);
+		arp->ar_hrd = htons(np->dev->type);
+		arp->ar_pro = htons(ETH_P_IP);
+		arp->ar_hln = np->dev->addr_len;
+		arp->ar_pln = 4;
+		arp->ar_op = htons(type);
+
+		arp_ptr = (unsigned char *)(arp + 1);
+		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &tip, 4);
+		arp_ptr += 4;
+		memcpy(arp_ptr, sha, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &sip, 4);
+
+		netpoll_send_skb(np, send_skb);
+
+		/* If there are several rx_hooks for the same address,
+		   we're fine by sending a single reply */
+		break;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 }
 
 int __netpoll_rx(struct sk_buff *skb)
 {
 	int proto, len, ulen;
+	int hits = 0;
 	struct iphdr *iph;
 	struct udphdr *uh;
-	struct netpoll_info *npi = skb->dev->npinfo;
-	struct netpoll *np = npi->rx_np;
+	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct netpoll *np, *tmp;
 
-	if (!np)
+	if (list_empty(&npinfo->rx_np))
 		goto out;
+
 	if (skb->dev->type != ARPHRD_ETHER)
 		goto out;
 
 	/* check if netpoll clients need ARP */
 	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
-		skb_queue_tail(&npi->arp_tx, skb);
+		skb_queue_tail(&npinfo->arp_tx, skb);
 		return 1;
 	}
 
@@ -542,16 +588,23 @@ int __netpoll_rx(struct sk_buff *skb)
 		goto out;
 	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
 		goto out;
-	if (np->local_ip && np->local_ip != iph->daddr)
-		goto out;
-	if (np->remote_ip && np->remote_ip != iph->saddr)
-		goto out;
-	if (np->local_port && np->local_port != ntohs(uh->dest))
-		goto out;
 
-	np->rx_hook(np, ntohs(uh->source),
-		    (char *)(uh+1),
-		    ulen - sizeof(struct udphdr));
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->local_ip && np->local_ip != iph->daddr)
+			continue;
+		if (np->remote_ip && np->remote_ip != iph->saddr)
+			continue;
+		if (np->local_port && np->local_port != ntohs(uh->dest))
+			continue;
+
+		np->rx_hook(np, ntohs(uh->source),
+			       (char *)(uh+1),
+			       ulen - sizeof(struct udphdr));
+		hits++;
+	}
+
+	if (!hits)
+		goto out;
 
 	kfree_skb(skb);
 	return 1;
@@ -571,7 +624,7 @@ void netpoll_print_options(struct netpoll *np)
 			 np->name, np->local_port);
 	printk(KERN_INFO "%s: local IP %pI4\n",
 			 np->name, &np->local_ip);
-	printk(KERN_INFO "%s: interface %s\n",
+	printk(KERN_INFO "%s: interface '%s'\n",
 			 np->name, np->dev_name);
 	printk(KERN_INFO "%s: remote port %d\n",
 			 np->name, np->remote_port);
@@ -580,6 +633,7 @@ void netpoll_print_options(struct netpoll *np)
 	printk(KERN_INFO "%s: remote ethernet address %pM\n",
 	                 np->name, np->remote_mac);
 }
+EXPORT_SYMBOL(netpoll_print_options);
 
 int netpoll_parse_options(struct netpoll *np, char *opt)
 {
@@ -618,6 +672,9 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 		if ((delim = strchr(cur, '@')) == NULL)
 			goto parse_failed;
 		*delim = 0;
+		if (*cur == ' ' || *cur == '\t')
+			printk(KERN_INFO "%s: warning: whitespace"
+					"is not allowed\n", np->name);
 		np->remote_port = simple_strtol(cur, NULL, 10);
 		cur = delim;
 	}
@@ -665,37 +722,37 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	return 0;
 
  parse_failed:
-	printk(KERN_INFO "%s: couldn't parse config at %s!\n",
+	printk(KERN_INFO "%s: couldn't parse config at '%s'!\n",
 	       np->name, cur);
 	return -1;
 }
+EXPORT_SYMBOL(netpoll_parse_options);
 
-int netpoll_setup(struct netpoll *np)
+int __netpoll_setup(struct netpoll *np)
 {
-	struct net_device *ndev = NULL;
-	struct in_device *in_dev;
+	struct net_device *ndev = np->dev;
 	struct netpoll_info *npinfo;
+	const struct net_device_ops *ops;
 	unsigned long flags;
 	int err;
 
-	if (np->dev_name)
-		ndev = dev_get_by_name(&init_net, np->dev_name);
-	if (!ndev) {
-		printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
+		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 		       np->name, np->dev_name);
-		return -ENODEV;
+		err = -ENOTSUPP;
+		goto out;
 	}
 
-	np->dev = ndev;
 	if (!ndev->npinfo) {
 		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
 		if (!npinfo) {
 			err = -ENOMEM;
-			goto release;
+			goto out;
 		}
 
 		npinfo->rx_flags = 0;
-		npinfo->rx_np = NULL;
+		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
 		skb_queue_head_init(&npinfo->arp_tx);
@@ -703,16 +760,51 @@ int netpoll_setup(struct netpoll *np)
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
 		atomic_set(&npinfo->refcnt, 1);
+
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_setup) {
+			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			if (err)
+				goto free_npinfo;
+		}
 	} else {
 		npinfo = ndev->npinfo;
 		atomic_inc(&npinfo->refcnt);
 	}
 
-	if (!ndev->netdev_ops->ndo_poll_controller) {
-		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+	npinfo->netpoll = np;
+
+	if (np->rx_hook) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+		list_add_tail(&np->rx, &npinfo->rx_np);
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
+
+	/* last thing to do is link it to the net device structure */
+	rcu_assign_pointer(ndev->npinfo, npinfo);
+
+	return 0;
+
+free_npinfo:
+	kfree(npinfo);
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(__netpoll_setup);
+
+int netpoll_setup(struct netpoll *np)
+{
+	struct net_device *ndev = NULL;
+	struct in_device *in_dev;
+	int err;
+
+	if (np->dev_name)
+		ndev = dev_get_by_name(&init_net, np->dev_name);
+	if (!ndev) {
+		printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
 		       np->name, np->dev_name);
-		err = -ENOTSUPP;
-		goto release;
+		return -ENODEV;
 	}
 
 	if (!netif_running(ndev)) {
@@ -728,11 +820,11 @@ int netpoll_setup(struct netpoll *np)
 		if (err) {
 			printk(KERN_ERR "%s: failed to open %s\n",
 			       np->name, ndev->name);
-			goto release;
+			goto put;
 		}
 
 		atleast = jiffies + HZ/10;
-		atmost = jiffies + 4*HZ;
+		atmost = jiffies + carrier_timeout * HZ;
 		while (!netif_carrier_ok(ndev)) {
 			if (time_after(jiffies, atmost)) {
 				printk(KERN_NOTICE
@@ -740,7 +832,7 @@ int netpoll_setup(struct netpoll *np)
 				       np->name);
 				break;
 			}
-			cond_resched();
+			msleep(1);
 		}
 
 		/* If carrier appears to come up instantly, we don't
@@ -765,7 +857,7 @@ int netpoll_setup(struct netpoll *np)
 			printk(KERN_ERR "%s: no IP address for %s, aborting\n",
 			       np->name, np->dev_name);
 			err = -EDESTADDRREQ;
-			goto release;
+			goto put;
 		}
 
 		np->local_ip = in_dev->ifa_list->ifa_local;
@@ -773,31 +865,25 @@ int netpoll_setup(struct netpoll *np)
 		printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip);
 	}
 
-	if (np->rx_hook) {
-		spin_lock_irqsave(&npinfo->rx_lock, flags);
-		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
-		npinfo->rx_np = np;
-		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-	}
+	np->dev = ndev;
 
 	/* fill up the skb queue */
 	refill_skbs();
 
-	/* last thing to do is link it to the net device structure */
-	ndev->npinfo = npinfo;
+	rtnl_lock();
+	err = __netpoll_setup(np);
+	rtnl_unlock();
 
-	/* avoid racing with NAPI reading npinfo */
-	synchronize_rcu();
+	if (err)
+		goto put;
 
 	return 0;
 
- release:
-	if (!ndev->npinfo)
-		kfree(npinfo);
-	np->dev = NULL;
+put:
 	dev_put(ndev);
 	return err;
 }
+EXPORT_SYMBOL(netpoll_setup);
 
 static int __init netpoll_init(void)
 {
@@ -806,43 +892,65 @@ static int __init netpoll_init(void)
 }
 core_initcall(netpoll_init);
 
-void netpoll_cleanup(struct netpoll *np)
+void __netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
 	unsigned long flags;
 
-	if (np->dev) {
-		npinfo = np->dev->npinfo;
-		if (npinfo) {
-			if (npinfo->rx_np == np) {
-				spin_lock_irqsave(&npinfo->rx_lock, flags);
-				npinfo->rx_np = NULL;
-				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
-				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-			}
+	npinfo = np->dev->npinfo;
+	if (!npinfo)
+		return;
+
+	if (!list_empty(&npinfo->rx_np)) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_del(&np->rx);
+		if (list_empty(&npinfo->rx_np))
+			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
 
-			if (atomic_dec_and_test(&npinfo->refcnt)) {
-				skb_queue_purge(&npinfo->arp_tx);
-				skb_queue_purge(&npinfo->txq);
-				cancel_rearming_delayed_work(&npinfo->tx_work);
+	if (atomic_dec_and_test(&npinfo->refcnt)) {
+		const struct net_device_ops *ops;
 
-				/* clean after last, unfinished work */
-				__skb_queue_purge(&npinfo->txq);
-				kfree(npinfo);
-				np->dev->npinfo = NULL;
-			}
-		}
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_cleanup)
+			ops->ndo_netpoll_cleanup(np->dev);
+
+		rcu_assign_pointer(np->dev->npinfo, NULL);
 
-		dev_put(np->dev);
+		/* avoid racing with NAPI reading npinfo */
+		synchronize_rcu_bh();
+
+		skb_queue_purge(&npinfo->arp_tx);
+		skb_queue_purge(&npinfo->txq);
+		cancel_delayed_work_sync(&npinfo->tx_work);
+
+		/* clean after last, unfinished work */
+		__skb_queue_purge(&npinfo->txq);
+		kfree(npinfo);
 	}
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
+void netpoll_cleanup(struct netpoll *np)
+{
+	if (!np->dev)
+		return;
+
+	rtnl_lock();
+	__netpoll_cleanup(np);
+	rtnl_unlock();
+
+	dev_put(np->dev);
 	np->dev = NULL;
 }
+EXPORT_SYMBOL(netpoll_cleanup);
 
 int netpoll_trap(void)
 {
 	return atomic_read(&trapped);
 }
+EXPORT_SYMBOL(netpoll_trap);
 
 void netpoll_set_trap(int trap)
 {
@@ -851,12 +959,4 @@ void netpoll_set_trap(int trap)
 	else
 		atomic_dec(&trapped);
 }
-
 EXPORT_SYMBOL(netpoll_set_trap);
-EXPORT_SYMBOL(netpoll_trap);
-EXPORT_SYMBOL(netpoll_print_options);
-EXPORT_SYMBOL(netpoll_parse_options);
-EXPORT_SYMBOL(netpoll_setup);
-EXPORT_SYMBOL(netpoll_cleanup);
-EXPORT_SYMBOL(netpoll_send_udp);
-EXPORT_SYMBOL(netpoll_poll);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 19b8c20e98a..a9e7fc4c461 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -115,6 +115,9 @@
  * command by Adit Ranadive <adit.262@gmail.com>
  *
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/sys.h>
 #include <linux/types.h>
 #include <linux/module.h>
@@ -131,6 +134,7 @@
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/capability.h>
+#include <linux/hrtimer.h>
 #include <linux/freezer.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
@@ -162,18 +166,19 @@
 #include <asm/byteorder.h>
 #include <linux/rcupdate.h>
 #include <linux/bitops.h>
-#include <asm/io.h>
+#include <linux/io.h>
+#include <linux/timex.h>
+#include <linux/uaccess.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
 #include <asm/div64.h>		/* do_div */
-#include <asm/timex.h>
-
-#define VERSION  "pktgen v2.70: Packet Generator for packet performance testing.\n"
 
+#define VERSION	"2.74"
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
 
+#define func_enter() pr_debug("entering %s\n", __func__);
+
 /* Device flag bits */
 #define F_IPSRC_RND   (1<<0)	/* IP-Src Random  */
 #define F_IPDST_RND   (1<<1)	/* IP-Dst Random  */
@@ -190,13 +195,13 @@
 #define F_IPSEC_ON    (1<<12)	/* ipsec on for flows */
 #define F_QUEUE_MAP_RND (1<<13)	/* queue map Random */
 #define F_QUEUE_MAP_CPU (1<<14)	/* queue map mirrors smp_processor_id() */
+#define F_NODE          (1<<15)	/* Node memory alloc*/
 
 /* Thread control flag bits */
-#define T_TERMINATE   (1<<0)
-#define T_STOP        (1<<1)	/* Stop run */
-#define T_RUN         (1<<2)	/* Start run */
-#define T_REMDEVALL   (1<<3)	/* Remove all devs */
-#define T_REMDEV      (1<<4)	/* Remove one dev */
+#define T_STOP        (1<<0)	/* Stop run */
+#define T_RUN         (1<<1)	/* Start run */
+#define T_REMDEVALL   (1<<2)	/* Remove all devs */
+#define T_REMDEV      (1<<3)	/* Remove one dev */
 
 /* If lock -- can be removed after some work */
 #define   if_lock(t)           spin_lock(&(t->if_lock));
@@ -206,7 +211,7 @@
 #define PKTGEN_MAGIC 0xbe9be955
 #define PG_PROC_DIR "pktgen"
 #define PGCTRL	    "pgctrl"
-static struct proc_dir_entry *pg_proc_dir = NULL;
+static struct proc_dir_entry *pg_proc_dir;
 
 #define MAX_CFLOWS  65536
 
@@ -231,9 +236,9 @@ struct pktgen_dev {
 	 */
 	struct proc_dir_entry *entry;	/* proc file */
 	struct pktgen_thread *pg_thread;/* the owner */
-	struct list_head list;		/* Used for chaining in the thread's run-queue */
+	struct list_head list;		/* chaining in the thread's run-queue */
 
-	int running;		/* if this changes to false, the test will stop */
+	int running;		/* if false, the test will stop */
 
 	/* If min != max, then we will either do a linear iteration, or
 	 * we will do a random selection from within the range.
@@ -246,33 +251,36 @@ struct pktgen_dev {
 	int max_pkt_size;	/* = ETH_ZLEN; */
 	int pkt_overhead;	/* overhead for MPLS, VLANs, IPSEC etc */
 	int nfrags;
-	__u32 delay_us;		/* Default delay */
-	__u32 delay_ns;
+	u64 delay;		/* nano-seconds */
+
 	__u64 count;		/* Default No packets to send */
 	__u64 sofar;		/* How many pkts we've sent so far */
 	__u64 tx_bytes;		/* How many bytes we've transmitted */
-	__u64 errors;		/* Errors when trying to transmit, pkts will be re-sent */
+	__u64 errors;		/* Errors when trying to transmit, */
 
 	/* runtime counters relating to clone_skb */
-	__u64 next_tx_us;	/* timestamp of when to tx next */
-	__u32 next_tx_ns;
 
 	__u64 allocated_skbs;
 	__u32 clone_count;
 	int last_ok;		/* Was last skb sent?
-				 * Or a failed transmit of some sort?  This will keep
-				 * sequence numbers in order, for example.
+				 * Or a failed transmit of some sort?
+				 * This will keep sequence numbers in order
 				 */
-	__u64 started_at;	/* micro-seconds */
-	__u64 stopped_at;	/* micro-seconds */
-	__u64 idle_acc;		/* micro-seconds */
+	ktime_t next_tx;
+	ktime_t started_at;
+	ktime_t stopped_at;
+	u64	idle_acc;	/* nano-seconds */
+
 	__u32 seq_num;
 
-	int clone_skb;		/* Use multiple SKBs during packet gen.  If this number
-				 * is greater than 1, then that many copies of the same
-				 * packet will be sent before a new packet is allocated.
-				 * For instance, if you want to send 1024 identical packets
-				 * before creating a new packet, set clone_skb to 1024.
+	int clone_skb;		/*
+				 * Use multiple SKBs during packet gen.
+				 * If this number is greater than 1, then
+				 * that many copies of the same packet will be
+				 * sent before a new packet is allocated.
+				 * If you want to send 1024 identical packets
+				 * before creating a new packet,
+				 * set clone_skb to 1024.
 				 */
 
 	char dst_min[IP_NAME_SZ];	/* IP, ie 1.2.3.4 */
@@ -304,8 +312,10 @@ struct pktgen_dev {
 	__u16 udp_dst_max;	/* exclusive, dest UDP port */
 
 	/* DSCP + ECN */
-	__u8 tos;            /* six most significant bits of (former) IPv4 TOS are for dscp codepoint */
-	__u8 traffic_class;  /* ditto for the (former) Traffic Class in IPv6 (see RFC 3260, sec. 4) */
+	__u8 tos;            /* six MSB of (former) IPv4 TOS
+				are for dscp codepoint */
+	__u8 traffic_class;  /* ditto for the (former) Traffic Class in IPv6
+				(see RFC 3260, sec. 4) */
 
 	/* MPLS */
 	unsigned nr_labels;	/* Depth of stack, 0 = no MPLS */
@@ -330,10 +340,12 @@ struct pktgen_dev {
 	__u32 cur_src_mac_offset;
 	__be32 cur_saddr;
 	__be32 cur_daddr;
+	__u16 ip_id;
 	__u16 cur_udp_dst;
 	__u16 cur_udp_src;
 	__u16 cur_queue_map;
 	__u32 cur_pkt_size;
+	__u32 last_pkt_size;
 
 	__u8 hh[14];
 	/* = {
@@ -346,15 +358,18 @@ struct pktgen_dev {
 	 */
 	__u16 pad;		/* pad out the hh struct to an even 16 bytes */
 
-	struct sk_buff *skb;	/* skb we are to transmit next, mainly used for when we
+	struct sk_buff *skb;	/* skb we are to transmit next, used for when we
 				 * are transmitting the same one multiple times
 				 */
-	struct net_device *odev;	/* The out-going device.  Note that the device should
-					 * have it's pg_info pointer pointing back to this
-					 * device.  This will be set when the user specifies
-					 * the out-going device name (not when the inject is
-					 * started as it used to do.)
-					 */
+	struct net_device *odev; /* The out-going device.
+				  * Note that the device should have it's
+				  * pg_info pointer pointing back to this
+				  * device.
+				  * Set when the user specifies the out-going
+				  * device name (not when the inject is
+				  * started as it used to do.)
+				  */
+	char odevname[32];
 	struct flow_state *flows;
 	unsigned cflows;	/* Concurrent flows (config) */
 	unsigned lflow;		/* Flow length  (config) */
@@ -363,6 +378,8 @@ struct pktgen_dev {
 
 	u16 queue_map_min;
 	u16 queue_map_max;
+	__u32 skb_priority;	/* skb priority field */
+	int node;               /* Memory node */
 
 #ifdef CONFIG_XFRM
 	__u8	ipsmode;		/* IPSEC mode (config) */
@@ -378,14 +395,17 @@ struct pktgen_hdr {
 	__be32 tv_usec;
 };
 
+static bool pktgen_exiting __read_mostly;
+
 struct pktgen_thread {
-	spinlock_t if_lock;
+	spinlock_t if_lock;		/* for list of devices */
 	struct list_head if_list;	/* All device here */
 	struct list_head th_list;
 	struct task_struct *tsk;
 	char result[512];
 
-	/* Field for thread to receive "posted" events terminate, stop ifs etc. */
+	/* Field for thread to receive "posted" events terminate,
+	   stop ifs etc. */
 
 	u32 control;
 	int cpu;
@@ -397,34 +417,33 @@ struct pktgen_thread {
 #define REMOVE 1
 #define FIND   0
 
-/** Convert to micro-seconds */
-static inline __u64 tv_to_us(const struct timeval *tv)
+static inline ktime_t ktime_now(void)
 {
-	__u64 us = tv->tv_usec;
-	us += (__u64) tv->tv_sec * (__u64) 1000000;
-	return us;
+	struct timespec ts;
+	ktime_get_ts(&ts);
+
+	return timespec_to_ktime(ts);
 }
 
-static __u64 getCurUs(void)
+/* This works even if 32 bit because of careful byte order choice */
+static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
 {
-	struct timeval tv;
-	do_gettimeofday(&tv);
-	return tv_to_us(&tv);
+	return cmp1.tv64 < cmp2.tv64;
 }
 
-/* old include end */
-
-static char version[] __initdata = VERSION;
+static const char version[] =
+	"Packet Generator for packet performance testing. "
+	"Version: " VERSION "\n";
 
 static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i);
 static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
 static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
-					  const char *ifname);
+					  const char *ifname, bool exact);
 static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
 static void pktgen_run_all_threads(void);
 static void pktgen_reset_all_threads(void);
 static void pktgen_stop_all_threads_ifs(void);
-static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
+
 static void pktgen_stop(struct pktgen_thread *t);
 static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
 
@@ -432,10 +451,10 @@ static unsigned int scan_ip6(const char *s, char ip[16]);
 static unsigned int fmt_ip6(char *s, const char ip[16]);
 
 /* Module parameters, defaults. */
-static int pg_count_d = 1000;	/* 1000 pkts by default */
-static int pg_delay_d;
-static int pg_clone_skb_d;
-static int debug;
+static int pg_count_d __read_mostly = 1000;
+static int pg_delay_d __read_mostly;
+static int pg_clone_skb_d  __read_mostly;
+static int debug  __read_mostly;
 
 static DEFINE_MUTEX(pktgen_thread_lock);
 static LIST_HEAD(pktgen_threads);
@@ -451,12 +470,12 @@ static struct notifier_block pktgen_notifier_block = {
 
 static int pgctrl_show(struct seq_file *seq, void *v)
 {
-	seq_puts(seq, VERSION);
+	seq_puts(seq, version);
 	return 0;
 }
 
-static ssize_t pgctrl_write(struct file *file, const char __user * buf,
-			    size_t count, loff_t * ppos)
+static ssize_t pgctrl_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
 {
 	int err = 0;
 	char data[128];
@@ -485,7 +504,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf,
 		pktgen_reset_all_threads();
 
 	else
-		printk(KERN_WARNING "pktgen: Unknown command: %s\n", data);
+		pr_warning("Unknown command: %s\n", data);
 
 	err = count;
 
@@ -509,10 +528,9 @@ static const struct file_operations pktgen_fops = {
 
 static int pktgen_if_show(struct seq_file *seq, void *v)
 {
-	struct pktgen_dev *pkt_dev = seq->private;
-	__u64 sa;
-	__u64 stopped;
-	__u64 now = getCurUs();
+	const struct pktgen_dev *pkt_dev = seq->private;
+	ktime_t stopped;
+	u64 idle;
 
 	seq_printf(seq,
 		   "Params: count %llu  min_pkt_size: %u  max_pkt_size: %u\n",
@@ -520,10 +538,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 		   pkt_dev->max_pkt_size);
 
 	seq_printf(seq,
-		   "     frags: %d  delay: %u  clone_skb: %d  ifname: %s\n",
-		   pkt_dev->nfrags,
-		   1000 * pkt_dev->delay_us + pkt_dev->delay_ns,
-		   pkt_dev->clone_skb, pkt_dev->odev->name);
+		   "     frags: %d  delay: %llu  clone_skb: %d  ifname: %s\n",
+		   pkt_dev->nfrags, (unsigned long long) pkt_dev->delay,
+		   pkt_dev->clone_skb, pkt_dev->odevname);
 
 	seq_printf(seq, "     flows: %u flowlen: %u\n", pkt_dev->cflows,
 		   pkt_dev->lflow);
@@ -533,6 +550,10 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 		   pkt_dev->queue_map_min,
 		   pkt_dev->queue_map_max);
 
+	if (pkt_dev->skb_priority)
+		seq_printf(seq, "     skb_priority: %u\n",
+			   pkt_dev->skb_priority);
+
 	if (pkt_dev->flags & F_IPV6) {
 		char b1[128], b2[128], b3[128];
 		fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
@@ -549,11 +570,14 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 			   "     daddr: %s  min_daddr: %s  max_daddr: %s\n", b1,
 			   b2, b3);
 
-	} else
+	} else {
+		seq_printf(seq,
+			   "     dst_min: %s  dst_max: %s\n",
+			   pkt_dev->dst_min, pkt_dev->dst_max);
 		seq_printf(seq,
-			   "     dst_min: %s  dst_max: %s\n     src_min: %s  src_max: %s\n",
-			   pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min,
-			   pkt_dev->src_max);
+			   "        src_min: %s  src_max: %s\n",
+			   pkt_dev->src_min, pkt_dev->src_max);
+	}
 
 	seq_puts(seq, "     src_mac: ");
 
@@ -565,7 +589,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "%pM\n", pkt_dev->dst_mac);
 
 	seq_printf(seq,
-		   "     udp_src_min: %d  udp_src_max: %d  udp_dst_min: %d  udp_dst_max: %d\n",
+		   "     udp_src_min: %d  udp_src_max: %d"
+		   "  udp_dst_min: %d  udp_dst_max: %d\n",
 		   pkt_dev->udp_src_min, pkt_dev->udp_src_max,
 		   pkt_dev->udp_dst_min, pkt_dev->udp_dst_max);
 
@@ -581,23 +606,24 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 				   i == pkt_dev->nr_labels-1 ? "\n" : ", ");
 	}
 
-	if (pkt_dev->vlan_id != 0xffff) {
+	if (pkt_dev->vlan_id != 0xffff)
 		seq_printf(seq, "     vlan_id: %u  vlan_p: %u  vlan_cfi: %u\n",
-			   pkt_dev->vlan_id, pkt_dev->vlan_p, pkt_dev->vlan_cfi);
-	}
+			   pkt_dev->vlan_id, pkt_dev->vlan_p,
+			   pkt_dev->vlan_cfi);
 
-	if (pkt_dev->svlan_id != 0xffff) {
+	if (pkt_dev->svlan_id != 0xffff)
 		seq_printf(seq, "     svlan_id: %u  vlan_p: %u  vlan_cfi: %u\n",
-			   pkt_dev->svlan_id, pkt_dev->svlan_p, pkt_dev->svlan_cfi);
-	}
+			   pkt_dev->svlan_id, pkt_dev->svlan_p,
+			   pkt_dev->svlan_cfi);
 
-	if (pkt_dev->tos) {
+	if (pkt_dev->tos)
 		seq_printf(seq, "     tos: 0x%02x\n", pkt_dev->tos);
-	}
 
-	if (pkt_dev->traffic_class) {
+	if (pkt_dev->traffic_class)
 		seq_printf(seq, "     traffic_class: 0x%02x\n", pkt_dev->traffic_class);
-	}
+
+	if (pkt_dev->node >= 0)
+		seq_printf(seq, "     node: %d\n", pkt_dev->node);
 
 	seq_printf(seq, "     Flags: ");
 
@@ -652,19 +678,26 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->flags & F_SVID_RND)
 		seq_printf(seq, "SVID_RND  ");
 
+	if (pkt_dev->flags & F_NODE)
+		seq_printf(seq, "NODE_ALLOC  ");
+
 	seq_puts(seq, "\n");
 
-	sa = pkt_dev->started_at;
-	stopped = pkt_dev->stopped_at;
-	if (pkt_dev->running)
-		stopped = now;	/* not really stopped, more like last-running-at */
+	/* not really stopped, more like last-running-at */
+	stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at;
+	idle = pkt_dev->idle_acc;
+	do_div(idle, NSEC_PER_USEC);
 
 	seq_printf(seq,
-		   "Current:\n     pkts-sofar: %llu  errors: %llu\n     started: %lluus  stopped: %lluus idle: %lluus\n",
+		   "Current:\n     pkts-sofar: %llu  errors: %llu\n",
 		   (unsigned long long)pkt_dev->sofar,
-		   (unsigned long long)pkt_dev->errors, (unsigned long long)sa,
-		   (unsigned long long)stopped,
-		   (unsigned long long)pkt_dev->idle_acc);
+		   (unsigned long long)pkt_dev->errors);
+
+	seq_printf(seq,
+		   "     started: %lluus  stopped: %lluus idle: %lluus\n",
+		   (unsigned long long) ktime_to_us(pkt_dev->started_at),
+		   (unsigned long long) ktime_to_us(stopped),
+		   (unsigned long long) idle);
 
 	seq_printf(seq,
 		   "     seq_num: %d  cur_dst_mac_offset: %d  cur_src_mac_offset: %d\n",
@@ -696,22 +729,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 }
 
 
-static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, __u32 *num)
+static int hex32_arg(const char __user *user_buffer, unsigned long maxlen,
+		     __u32 *num)
 {
 	int i = 0;
 	*num = 0;
 
 	for (; i < maxlen; i++) {
+		int value;
 		char c;
 		*num <<= 4;
 		if (get_user(c, &user_buffer[i]))
 			return -EFAULT;
-		if ((c >= '0') && (c <= '9'))
-			*num |= c - '0';
-		else if ((c >= 'a') && (c <= 'f'))
-			*num |= c - 'a' + 10;
-		else if ((c >= 'A') && (c <= 'F'))
-			*num |= c - 'A' + 10;
+		value = hex_to_bin(c);
+		if (value >= 0)
+			*num |= value;
 		else
 			break;
 	}
@@ -746,10 +778,10 @@ done:
 static unsigned long num_arg(const char __user * user_buffer,
 			     unsigned long maxlen, unsigned long *num)
 {
-	int i = 0;
+	int i;
 	*num = 0;
 
-	for (; i < maxlen; i++) {
+	for (i = 0; i < maxlen; i++) {
 		char c;
 		if (get_user(c, &user_buffer[i]))
 			return -EFAULT;
@@ -764,9 +796,9 @@ static unsigned long num_arg(const char __user * user_buffer,
 
 static int strn_len(const char __user * user_buffer, unsigned int maxlen)
 {
-	int i = 0;
+	int i;
 
-	for (; i < maxlen; i++) {
+	for (i = 0; i < maxlen; i++) {
 		char c;
 		if (get_user(c, &user_buffer[i]))
 			return -EFAULT;
@@ -819,9 +851,9 @@ static ssize_t pktgen_if_write(struct file *file,
 			       const char __user * user_buffer, size_t count,
 			       loff_t * offset)
 {
-	struct seq_file *seq = (struct seq_file *)file->private_data;
+	struct seq_file *seq = file->private_data;
 	struct pktgen_dev *pkt_dev = seq->private;
-	int i = 0, max, len;
+	int i, max, len;
 	char name[16], valstr[32];
 	unsigned long value = 0;
 	char *pg_result = NULL;
@@ -831,24 +863,24 @@ static ssize_t pktgen_if_write(struct file *file,
 	pg_result = &(pkt_dev->result[0]);
 
 	if (count < 1) {
-		printk(KERN_WARNING "pktgen: wrong command format\n");
+		pr_warning("wrong command format\n");
 		return -EINVAL;
 	}
 
-	max = count - i;
-	tmp = count_trail_chars(&user_buffer[i], max);
+	max = count;
+	tmp = count_trail_chars(user_buffer, max);
 	if (tmp < 0) {
-		printk(KERN_WARNING "pktgen: illegal format\n");
+		pr_warning("illegal format\n");
 		return tmp;
 	}
-	i += tmp;
+	i = tmp;
 
 	/* Read variable name */
 
 	len = strn_len(&user_buffer[i], sizeof(name) - 1);
-	if (len < 0) {
+	if (len < 0)
 		return len;
-	}
+
 	memset(name, 0, sizeof(name));
 	if (copy_from_user(name, &user_buffer[i], len))
 		return -EFAULT;
@@ -862,19 +894,20 @@ static ssize_t pktgen_if_write(struct file *file,
 	i += len;
 
 	if (debug) {
-		char tb[count + 1];
-		if (copy_from_user(tb, user_buffer, count))
+		size_t copy = min_t(size_t, count, 1023);
+		char tb[copy + 1];
+		if (copy_from_user(tb, user_buffer, copy))
 			return -EFAULT;
-		tb[count] = 0;
+		tb[copy] = 0;
 		printk(KERN_DEBUG "pktgen: %s,%lu  buffer -:%s:-\n", name,
 		       (unsigned long)count, tb);
 	}
 
 	if (!strcmp(name, "min_pkt_size")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (value < 14 + 20 + 8)
 			value = 14 + 20 + 8;
@@ -889,9 +922,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "max_pkt_size")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (value < 14 + 20 + 8)
 			value = 14 + 20 + 8;
@@ -908,9 +941,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "pkt_size")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (value < 14 + 20 + 8)
 			value = 14 + 20 + 8;
@@ -925,9 +958,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "debug")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		debug = value;
 		sprintf(pg_result, "OK: debug=%u", debug);
@@ -936,9 +969,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "frags")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		pkt_dev->nfrags = value;
 		sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags);
@@ -946,26 +979,54 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "delay")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
-		if (value == 0x7FFFFFFF) {
-			pkt_dev->delay_us = 0x7FFFFFFF;
-			pkt_dev->delay_ns = 0;
-		} else {
-			pkt_dev->delay_us = value / 1000;
-			pkt_dev->delay_ns = value % 1000;
-		}
-		sprintf(pg_result, "OK: delay=%u",
-			1000 * pkt_dev->delay_us + pkt_dev->delay_ns);
+		if (value == 0x7FFFFFFF)
+			pkt_dev->delay = ULLONG_MAX;
+		else
+			pkt_dev->delay = (u64)value;
+
+		sprintf(pg_result, "OK: delay=%llu",
+			(unsigned long long) pkt_dev->delay);
+		return count;
+	}
+	if (!strcmp(name, "rate")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (!value)
+			return len;
+		pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
+		if (debug)
+			pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
+
+		sprintf(pg_result, "OK: rate=%lu", value);
+		return count;
+	}
+	if (!strcmp(name, "ratep")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (!value)
+			return len;
+		pkt_dev->delay = NSEC_PER_SEC/value;
+		if (debug)
+			pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
+
+		sprintf(pg_result, "OK: rate=%lu", value);
 		return count;
 	}
 	if (!strcmp(name, "udp_src_min")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (value != pkt_dev->udp_src_min) {
 			pkt_dev->udp_src_min = value;
@@ -976,9 +1037,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "udp_dst_min")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (value != pkt_dev->udp_dst_min) {
 			pkt_dev->udp_dst_min = value;
@@ -989,9 +1050,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "udp_src_max")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (value != pkt_dev->udp_src_max) {
 			pkt_dev->udp_src_max = value;
@@ -1002,9 +1063,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "udp_dst_max")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (value != pkt_dev->udp_dst_max) {
 			pkt_dev->udp_dst_max = value;
@@ -1015,9 +1076,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "clone_skb")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		pkt_dev->clone_skb = value;
 
@@ -1026,9 +1087,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "count")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		pkt_dev->count = value;
 		sprintf(pg_result, "OK: count=%llu",
@@ -1037,9 +1098,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "src_mac_count")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (pkt_dev->src_mac_count != value) {
 			pkt_dev->src_mac_count = value;
@@ -1051,9 +1112,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "dst_mac_count")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (pkt_dev->dst_mac_count != value) {
 			pkt_dev->dst_mac_count = value;
@@ -1063,13 +1124,28 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->dst_mac_count);
 		return count;
 	}
+	if (!strcmp(name, "node")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+
+		if (node_possible(value)) {
+			pkt_dev->node = value;
+			sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+		}
+		else
+			sprintf(pg_result, "ERROR: node not possible");
+		return count;
+	}
 	if (!strcmp(name, "flag")) {
 		char f[32];
 		memset(f, 0, 32);
 		len = strn_len(&user_buffer[i], sizeof(f) - 1);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		if (copy_from_user(f, &user_buffer[i], len))
 			return -EFAULT;
 		i += len;
@@ -1155,12 +1231,18 @@ static ssize_t pktgen_if_write(struct file *file,
 		else if (strcmp(f, "!IPV6") == 0)
 			pkt_dev->flags &= ~F_IPV6;
 
+		else if (strcmp(f, "NODE_ALLOC") == 0)
+			pkt_dev->flags |= F_NODE;
+
+		else if (strcmp(f, "!NODE_ALLOC") == 0)
+			pkt_dev->flags &= ~F_NODE;
+
 		else {
 			sprintf(pg_result,
 				"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
 				f,
 				"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
-				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n");
+				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
 			return count;
 		}
 		sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -1168,9 +1250,8 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) {
 		len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
 
 		if (copy_from_user(buf, &user_buffer[i], len))
 			return -EFAULT;
@@ -1190,9 +1271,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "dst_max")) {
 		len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 
 		if (copy_from_user(buf, &user_buffer[i], len))
 			return -EFAULT;
@@ -1303,9 +1384,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "src_min")) {
 		len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		if (copy_from_user(buf, &user_buffer[i], len))
 			return -EFAULT;
 		buf[len] = 0;
@@ -1324,9 +1405,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	}
 	if (!strcmp(name, "src_max")) {
 		len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		if (copy_from_user(buf, &user_buffer[i], len))
 			return -EFAULT;
 		buf[len] = 0;
@@ -1350,27 +1431,21 @@ static ssize_t pktgen_if_write(struct file *file,
 		memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN);
 
 		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		memset(valstr, 0, sizeof(valstr));
 		if (copy_from_user(valstr, &user_buffer[i], len))
 			return -EFAULT;
 		i += len;
 
 		for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) {
-			if (*v >= '0' && *v <= '9') {
-				*m *= 16;
-				*m += *v - '0';
-			}
-			if (*v >= 'A' && *v <= 'F') {
-				*m *= 16;
-				*m += *v - 'A' + 10;
-			}
-			if (*v >= 'a' && *v <= 'f') {
-				*m *= 16;
-				*m += *v - 'a' + 10;
-			}
+			int value;
+
+			value = hex_to_bin(*v);
+			if (value >= 0)
+				*m = *m * 16 + value;
+
 			if (*v == ':') {
 				m++;
 				*m = 0;
@@ -1392,27 +1467,21 @@ static ssize_t pktgen_if_write(struct file *file,
 		memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN);
 
 		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		memset(valstr, 0, sizeof(valstr));
 		if (copy_from_user(valstr, &user_buffer[i], len))
 			return -EFAULT;
 		i += len;
 
 		for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) {
-			if (*v >= '0' && *v <= '9') {
-				*m *= 16;
-				*m += *v - '0';
-			}
-			if (*v >= 'A' && *v <= 'F') {
-				*m *= 16;
-				*m += *v - 'A' + 10;
-			}
-			if (*v >= 'a' && *v <= 'f') {
-				*m *= 16;
-				*m += *v - 'a' + 10;
-			}
+			int value;
+
+			value = hex_to_bin(*v);
+			if (value >= 0)
+				*m = *m * 16 + value;
+
 			if (*v == ':') {
 				m++;
 				*m = 0;
@@ -1435,9 +1504,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "flows")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (value > MAX_CFLOWS)
 			value = MAX_CFLOWS;
@@ -1449,9 +1518,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "flowlen")) {
 		len = num_arg(&user_buffer[i], 10, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		pkt_dev->lflow = value;
 		sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow);
@@ -1460,9 +1529,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "queue_map_min")) {
 		len = num_arg(&user_buffer[i], 5, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		pkt_dev->queue_map_min = value;
 		sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min);
@@ -1471,9 +1540,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "queue_map_max")) {
 		len = num_arg(&user_buffer[i], 5, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		pkt_dev->queue_map_max = value;
 		sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max);
@@ -1505,9 +1574,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "vlan_id")) {
 		len = num_arg(&user_buffer[i], 4, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (value <= 4095) {
 			pkt_dev->vlan_id = value;  /* turn on VLAN */
@@ -1532,9 +1601,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "vlan_p")) {
 		len = num_arg(&user_buffer[i], 1, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if ((value <= 7) && (pkt_dev->vlan_id != 0xffff)) {
 			pkt_dev->vlan_p = value;
@@ -1547,9 +1616,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "vlan_cfi")) {
 		len = num_arg(&user_buffer[i], 1, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if ((value <= 1) && (pkt_dev->vlan_id != 0xffff)) {
 			pkt_dev->vlan_cfi = value;
@@ -1562,9 +1631,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "svlan_id")) {
 		len = num_arg(&user_buffer[i], 4, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if ((value <= 4095) && ((pkt_dev->vlan_id != 0xffff))) {
 			pkt_dev->svlan_id = value;  /* turn on SVLAN */
@@ -1589,9 +1658,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "svlan_p")) {
 		len = num_arg(&user_buffer[i], 1, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if ((value <= 7) && (pkt_dev->svlan_id != 0xffff)) {
 			pkt_dev->svlan_p = value;
@@ -1604,9 +1673,9 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (!strcmp(name, "svlan_cfi")) {
 		len = num_arg(&user_buffer[i], 1, &value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if ((value <= 1) && (pkt_dev->svlan_id != 0xffff)) {
 			pkt_dev->svlan_cfi = value;
@@ -1620,9 +1689,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	if (!strcmp(name, "tos")) {
 		__u32 tmp_value = 0;
 		len = hex32_arg(&user_buffer[i], 2, &tmp_value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (len == 2) {
 			pkt_dev->tos = tmp_value;
@@ -1636,9 +1705,9 @@ static ssize_t pktgen_if_write(struct file *file,
 	if (!strcmp(name, "traffic_class")) {
 		__u32 tmp_value = 0;
 		len = hex32_arg(&user_buffer[i], 2, &tmp_value);
-		if (len < 0) {
+		if (len < 0)
 			return len;
-		}
+
 		i += len;
 		if (len == 2) {
 			pkt_dev->traffic_class = tmp_value;
@@ -1649,6 +1718,18 @@ static ssize_t pktgen_if_write(struct file *file,
 		return count;
 	}
 
+	if (!strcmp(name, "skb_priority")) {
+		len = num_arg(&user_buffer[i], 9, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		pkt_dev->skb_priority = value;
+		sprintf(pg_result, "OK: skb_priority=%i",
+			pkt_dev->skb_priority);
+		return count;
+	}
+
 	sprintf(pkt_dev->result, "No such parameter \"%s\"", name);
 	return -EINVAL;
 }
@@ -1670,7 +1751,7 @@ static const struct file_operations pktgen_if_fops = {
 static int pktgen_thread_show(struct seq_file *seq, void *v)
 {
 	struct pktgen_thread *t = seq->private;
-	struct pktgen_dev *pkt_dev;
+	const struct pktgen_dev *pkt_dev;
 
 	BUG_ON(!t);
 
@@ -1679,13 +1760,13 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
 	if_lock(t);
 	list_for_each_entry(pkt_dev, &t->if_list, list)
 		if (pkt_dev->running)
-			seq_printf(seq, "%s ", pkt_dev->odev->name);
+			seq_printf(seq, "%s ", pkt_dev->odevname);
 
 	seq_printf(seq, "\nStopped: ");
 
 	list_for_each_entry(pkt_dev, &t->if_list, list)
 		if (!pkt_dev->running)
-			seq_printf(seq, "%s ", pkt_dev->odev->name);
+			seq_printf(seq, "%s ", pkt_dev->odevname);
 
 	if (t->result[0])
 		seq_printf(seq, "\nResult: %s\n", t->result);
@@ -1701,9 +1782,9 @@ static ssize_t pktgen_thread_write(struct file *file,
 				   const char __user * user_buffer,
 				   size_t count, loff_t * offset)
 {
-	struct seq_file *seq = (struct seq_file *)file->private_data;
+	struct seq_file *seq = file->private_data;
 	struct pktgen_thread *t = seq->private;
-	int i = 0, max, len, ret;
+	int i, max, len, ret;
 	char name[40];
 	char *pg_result;
 
@@ -1712,12 +1793,12 @@ static ssize_t pktgen_thread_write(struct file *file,
 		return -EINVAL;
 	}
 
-	max = count - i;
-	len = count_trail_chars(&user_buffer[i], max);
+	max = count;
+	len = count_trail_chars(user_buffer, max);
 	if (len < 0)
 		return len;
 
-	i += len;
+	i = len;
 
 	/* Read variable name */
 
@@ -1742,7 +1823,7 @@ static ssize_t pktgen_thread_write(struct file *file,
 		       name, (unsigned long)count);
 
 	if (!t) {
-		printk(KERN_ERR "pktgen: ERROR: No thread\n");
+		pr_err("ERROR: No thread\n");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1808,9 +1889,10 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
 {
 	struct pktgen_thread *t;
 	struct pktgen_dev *pkt_dev = NULL;
+	bool exact = (remove == FIND);
 
 	list_for_each_entry(t, &pktgen_threads, th_list) {
-		pkt_dev = pktgen_find_dev(t, ifname);
+		pkt_dev = pktgen_find_dev(t, ifname, exact);
 		if (pkt_dev) {
 			if (remove) {
 				if_lock(t);
@@ -1834,7 +1916,7 @@ static void pktgen_mark_device(const char *ifname)
 	int i = 0;
 
 	mutex_lock(&pktgen_thread_lock);
-	pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
+	pr_debug("%s: marking %s for removal\n", __func__, ifname);
 
 	while (1) {
 
@@ -1843,15 +1925,14 @@ static void pktgen_mark_device(const char *ifname)
 			break;	/* success */
 
 		mutex_unlock(&pktgen_thread_lock);
-		pr_debug("pktgen: pktgen_mark_device waiting for %s "
-				"to disappear....\n", ifname);
+		pr_debug("%s: waiting for %s to disappear....\n",
+			 __func__, ifname);
 		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
 		mutex_lock(&pktgen_thread_lock);
 
 		if (++i >= max_tries) {
-			printk(KERN_ERR "pktgen_mark_device: timed out after "
-			       "waiting %d msec for device %s to be removed\n",
-			       msec_per_try * i, ifname);
+			pr_err("%s: timed out after waiting %d msec for device %s to be removed\n",
+			       __func__, msec_per_try * i, ifname);
 			break;
 		}
 
@@ -1873,11 +1954,13 @@ static void pktgen_change_name(struct net_device *dev)
 
 			remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
 
-			pkt_dev->entry = create_proc_entry(dev->name, 0600,
-							   pg_proc_dir);
+			pkt_dev->entry = proc_create_data(dev->name, 0600,
+							  pg_proc_dir,
+							  &pktgen_if_fops,
+							  pkt_dev);
 			if (!pkt_dev->entry)
-				printk(KERN_ERR "pktgen: can't move proc "
-				       " entry for '%s'\n", dev->name);
+				pr_err("can't move proc entry for '%s'\n",
+				       dev->name);
 			break;
 		}
 	}
@@ -1908,13 +1991,14 @@ static int pktgen_device_event(struct notifier_block *unused,
 	return NOTIFY_DONE;
 }
 
-static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, const char *ifname)
+static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
+						 const char *ifname)
 {
 	char b[IFNAMSIZ+5];
-	int i = 0;
+	int i;
 
-	for(i=0; ifname[i] != '@'; i++) {
-		if(i == IFNAMSIZ)
+	for (i = 0; ifname[i] != '@'; i++) {
+		if (i == IFNAMSIZ)
 			break;
 
 		b[i] = ifname[i];
@@ -1940,15 +2024,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 
 	odev = pktgen_dev_get_by_name(pkt_dev, ifname);
 	if (!odev) {
-		printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname);
+		pr_err("no such netdevice: \"%s\"\n", ifname);
 		return -ENODEV;
 	}
 
 	if (odev->type != ARPHRD_ETHER) {
-		printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname);
+		pr_err("not an ethernet device: \"%s\"\n", ifname);
 		err = -EINVAL;
 	} else if (!netif_running(odev)) {
-		printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname);
+		pr_err("device is down: \"%s\"\n", ifname);
 		err = -ENETDOWN;
 	} else {
 		pkt_dev->odev = odev;
@@ -1967,8 +2051,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 	int ntxq;
 
 	if (!pkt_dev->odev) {
-		printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in "
-		       "setup_inject.\n");
+		pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n");
 		sprintf(pkt_dev->result,
 			"ERROR: pkt_dev->odev == NULL in setup_inject.\n");
 		return;
@@ -1978,19 +2061,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 	ntxq = pkt_dev->odev->real_num_tx_queues;
 
 	if (ntxq <= pkt_dev->queue_map_min) {
-		printk(KERN_WARNING "pktgen: WARNING: Requested "
-		       "queue_map_min (zero-based) (%d) exceeds valid range "
-		       "[0 - %d] for (%d) queues on %s, resetting\n",
-		       pkt_dev->queue_map_min, (ntxq ?: 1)- 1, ntxq,
-		       pkt_dev->odev->name);
+		pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+			   pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
+			   pkt_dev->odevname);
 		pkt_dev->queue_map_min = ntxq - 1;
 	}
 	if (pkt_dev->queue_map_max >= ntxq) {
-		printk(KERN_WARNING "pktgen: WARNING: Requested "
-		       "queue_map_max (zero-based) (%d) exceeds valid range "
-		       "[0 - %d] for (%d) queues on %s, resetting\n",
-		       pkt_dev->queue_map_max, (ntxq ?: 1)- 1, ntxq,
-		       pkt_dev->odev->name);
+		pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+			   pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
+			   pkt_dev->odevname);
 		pkt_dev->queue_map_max = ntxq - 1;
 	}
 
@@ -2030,15 +2109,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 			 */
 
 			rcu_read_lock();
-			if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) {
+			idev = __in6_dev_get(pkt_dev->odev);
+			if (idev) {
 				struct inet6_ifaddr *ifp;
 
 				read_lock_bh(&idev->lock);
 				for (ifp = idev->addr_list; ifp;
 				     ifp = ifp->if_next) {
-					if (ifp->scope == IFA_LINK
-					    && !(ifp->
-						 flags & IFA_F_TENTATIVE)) {
+					if (ifp->scope == IFA_LINK &&
+					    !(ifp->flags & IFA_F_TENTATIVE)) {
 						ipv6_addr_copy(&pkt_dev->
 							       cur_in6_saddr,
 							       &ifp->addr);
@@ -2050,8 +2129,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 			}
 			rcu_read_unlock();
 			if (err)
-				printk(KERN_ERR "pktgen: ERROR: IPv6 link "
-				       "address not availble.\n");
+				pr_err("ERROR: IPv6 link address not available\n");
 		}
 #endif
 	} else {
@@ -2089,27 +2167,45 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 	pkt_dev->nflows = 0;
 }
 
-static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
+
+static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 {
-	__u64 start;
-	__u64 now;
+	ktime_t start_time, end_time;
+	s64 remaining;
+	struct hrtimer_sleeper t;
 
-	start = now = getCurUs();
-	while (now < spin_until_us) {
-		/* TODO: optimize sleeping behavior */
-		if (spin_until_us - now > jiffies_to_usecs(1) + 1)
-			schedule_timeout_interruptible(1);
-		else if (spin_until_us - now > 100) {
-			if (!pkt_dev->running)
-				return;
-			if (need_resched())
+	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_set_expires(&t.timer, spin_until);
+
+	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
+	if (remaining <= 0) {
+		pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
+		return;
+	}
+
+	start_time = ktime_now();
+	if (remaining < 100000)
+		ndelay(remaining);	/* really small just spin */
+	else {
+		/* see do_nanosleep */
+		hrtimer_init_sleeper(&t, current);
+		do {
+			set_current_state(TASK_INTERRUPTIBLE);
+			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
+			if (!hrtimer_active(&t.timer))
+				t.task = NULL;
+
+			if (likely(t.task))
 				schedule();
-		}
 
-		now = getCurUs();
+			hrtimer_cancel(&t.timer);
+		} while (t.task && pkt_dev->running && !signal_pending(current));
+		__set_current_state(TASK_RUNNING);
 	}
+	end_time = ktime_now();
 
-	pkt_dev->idle_acc += now - start;
+	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
+	pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
 }
 
 static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
@@ -2120,13 +2216,9 @@ static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
 	pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
 }
 
-static inline int f_seen(struct pktgen_dev *pkt_dev, int flow)
+static inline int f_seen(const struct pktgen_dev *pkt_dev, int flow)
 {
-
-	if (pkt_dev->flows[flow].flags & F_INIT)
-		return 1;
-	else
-		return 0;
+	return !!(pkt_dev->flows[flow].flags & F_INIT);
 }
 
 static inline int f_pick(struct pktgen_dev *pkt_dev)
@@ -2160,12 +2252,13 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
 /* If there was already an IPSEC SA, we keep it as is, else
  * we go look for it ...
 */
+#define DUMMY_MARK 0
 static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 {
 	struct xfrm_state *x = pkt_dev->flows[flow].x;
 	if (!x) {
 		/*slow path: we dont already have xfrm_state*/
-		x = xfrm_stateonly_find(&init_net,
+		x = xfrm_stateonly_find(&init_net, DUMMY_MARK,
 					(xfrm_address_t *)&pkt_dev->cur_daddr,
 					(xfrm_address_t *)&pkt_dev->cur_saddr,
 					AF_INET,
@@ -2174,7 +2267,7 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 		if (x) {
 			pkt_dev->flows[flow].x = x;
 			set_pkt_overhead(pkt_dev);
-			pkt_dev->pkt_overhead+=x->props.header_len;
+			pkt_dev->pkt_overhead += x->props.header_len;
 		}
 
 	}
@@ -2186,7 +2279,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->flags & F_QUEUE_MAP_CPU)
 		pkt_dev->cur_queue_map = smp_processor_id();
 
-	else if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
+	else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) {
 		__u16 t;
 		if (pkt_dev->flags & F_QUEUE_MAP_RND) {
 			t = random32() %
@@ -2313,18 +2406,18 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 	if (!(pkt_dev->flags & F_IPV6)) {
 
-		if ((imn = ntohl(pkt_dev->saddr_min)) < (imx =
-							 ntohl(pkt_dev->
-							       saddr_max))) {
+		imn = ntohl(pkt_dev->saddr_min);
+		imx = ntohl(pkt_dev->saddr_max);
+		if (imn < imx) {
 			__u32 t;
 			if (pkt_dev->flags & F_IPSRC_RND)
 				t = random32() % (imx - imn) + imn;
 			else {
 				t = ntohl(pkt_dev->cur_saddr);
 				t++;
-				if (t > imx) {
+				if (t > imx)
 					t = imn;
-				}
+
 			}
 			pkt_dev->cur_saddr = htonl(t);
 		}
@@ -2435,19 +2528,19 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
 	if (err)
 		goto error;
 
-	x->curlft.bytes +=skb->len;
+	x->curlft.bytes += skb->len;
 	x->curlft.packets++;
 error:
 	spin_unlock(&x->lock);
 	return err;
 }
 
-static inline void free_SAs(struct pktgen_dev *pkt_dev)
+static void free_SAs(struct pktgen_dev *pkt_dev)
 {
 	if (pkt_dev->cflows) {
 		/* let go of the SAs if we have them */
-		int i = 0;
-		for (;  i < pkt_dev->cflows; i++) {
+		int i;
+		for (i = 0; i < pkt_dev->cflows; i++) {
 			struct xfrm_state *x = pkt_dev->flows[i].x;
 			if (x) {
 				xfrm_state_put(x);
@@ -2457,7 +2550,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev)
 	}
 }
 
-static inline int process_ipsec(struct pktgen_dev *pkt_dev,
+static int process_ipsec(struct pktgen_dev *pkt_dev,
 			      struct sk_buff *skb, __be16 protocol)
 {
 	if (pkt_dev->flags & F_IPSEC_ON) {
@@ -2467,11 +2560,11 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 			int ret;
 			__u8 *eth;
 			nhead = x->props.header_len - skb_headroom(skb);
-			if (nhead >0) {
+			if (nhead > 0) {
 				ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
 				if (ret < 0) {
-					printk(KERN_ERR "Error expanding "
-					       "ipsec packet %d\n",ret);
+					pr_err("Error expanding ipsec packet %d\n",
+					       ret);
 					goto err;
 				}
 			}
@@ -2480,14 +2573,13 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 			skb_pull(skb, ETH_HLEN);
 			ret = pktgen_output_ipsec(skb, pkt_dev);
 			if (ret) {
-				printk(KERN_ERR "Error creating ipsec "
-				       "packet %d\n",ret);
+				pr_err("Error creating ipsec packet %d\n", ret);
 				goto err;
 			}
 			/* restore ll */
 			eth = (__u8 *) skb_push(skb, ETH_HLEN);
 			memcpy(eth, pkt_dev->hh, 12);
-			*(u16 *) & eth[12] = protocol;
+			*(u16 *) &eth[12] = protocol;
 		}
 	}
 	return 1;
@@ -2500,9 +2592,9 @@ err:
 static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
 {
 	unsigned i;
-	for (i = 0; i < pkt_dev->nr_labels; i++) {
+	for (i = 0; i < pkt_dev->nr_labels; i++)
 		*mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM;
-	}
+
 	mpls--;
 	*mpls |= MPLS_STACK_BOTTOM;
 }
@@ -2539,16 +2631,36 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
 	 */
-	queue_map = pkt_dev->cur_queue_map;
 	mod_cur_headers(pkt_dev);
+	queue_map = pkt_dev->cur_queue_map;
 
 	datalen = (odev->hard_header_len + 16) & ~0xf;
-	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen +
-			pkt_dev->pkt_overhead, GFP_ATOMIC);
+
+	if (pkt_dev->flags & F_NODE) {
+		int node;
+
+		if (pkt_dev->node >= 0)
+			node = pkt_dev->node;
+		else
+			node =  numa_node_id();
+
+		skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
+				  + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
+		if (likely(skb)) {
+			skb_reserve(skb, NET_SKB_PAD);
+			skb->dev = odev;
+		}
+	}
+	else
+	  skb = __netdev_alloc_skb(odev,
+				   pkt_dev->cur_pkt_size + 64
+				   + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
 	if (!skb) {
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
 	}
+	prefetchw(skb->data);
 
 	skb_reserve(skb, datalen);
 
@@ -2579,6 +2691,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	skb->transport_header = skb->network_header + sizeof(struct iphdr);
 	skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
 	skb_set_queue_mapping(skb, queue_map);
+	skb->priority = pkt_dev->skb_priority;
+
 	iph = ip_hdr(skb);
 	udph = udp_hdr(skb);
 
@@ -2603,6 +2717,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	iph->protocol = IPPROTO_UDP;	/* UDP */
 	iph->saddr = pkt_dev->cur_saddr;
 	iph->daddr = pkt_dev->cur_daddr;
+	iph->id = htons(pkt_dev->ip_id);
+	pkt_dev->ip_id++;
 	iph->frag_off = 0;
 	iplen = 20 + 8 + datalen;
 	iph->tot_len = htons(iplen);
@@ -2614,24 +2730,26 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
 
-	if (pkt_dev->nfrags <= 0)
+	if (pkt_dev->nfrags <= 0) {
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
-	else {
+		memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr));
+	} else {
 		int frags = pkt_dev->nfrags;
-		int i;
+		int i, len;
 
 		pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
 
 		if (frags > MAX_SKB_FRAGS)
 			frags = MAX_SKB_FRAGS;
 		if (datalen > frags * PAGE_SIZE) {
-			skb_put(skb, datalen - frags * PAGE_SIZE);
+			len = datalen - frags * PAGE_SIZE;
+			memset(skb_put(skb, len), 0, len);
 			datalen = frags * PAGE_SIZE;
 		}
 
 		i = 0;
 		while (datalen > 0) {
-			struct page *page = alloc_pages(GFP_KERNEL, 0);
+			struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
 			skb_shinfo(skb)->frags[i].page = page;
 			skb_shinfo(skb)->frags[i].page_offset = 0;
 			skb_shinfo(skb)->frags[i].size =
@@ -2668,8 +2786,9 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 		}
 	}
 
-	/* Stamp the time, and sequence number, convert them to network byte order */
-
+	/* Stamp the time, and sequence number,
+	 * convert them to network byte order
+	 */
 	if (pgh) {
 		struct timeval timestamp;
 
@@ -2879,15 +2998,17 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
 	 */
-	queue_map = pkt_dev->cur_queue_map;
 	mod_cur_headers(pkt_dev);
+	queue_map = pkt_dev->cur_queue_map;
 
-	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 +
-			pkt_dev->pkt_overhead, GFP_ATOMIC);
+	skb = __netdev_alloc_skb(odev,
+				 pkt_dev->cur_pkt_size + 64
+				 + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT);
 	if (!skb) {
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
 	}
+	prefetchw(skb->data);
 
 	skb_reserve(skb, 16);
 
@@ -2918,11 +3039,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
 	skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
 	skb_set_queue_mapping(skb, queue_map);
+	skb->priority = pkt_dev->skb_priority;
 	iph = ipv6_hdr(skb);
 	udph = udp_hdr(skb);
 
 	memcpy(eth, pkt_dev->hh, 12);
-	*(__be16 *) & eth[12] = protocol;
+	*(__be16 *) &eth[12] = protocol;
 
 	/* Eth + IPh + UDPh + mpls */
 	datalen = pkt_dev->cur_pkt_size - 14 -
@@ -2932,8 +3054,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	if (datalen < sizeof(struct pktgen_hdr)) {
 		datalen = sizeof(struct pktgen_hdr);
 		if (net_ratelimit())
-			printk(KERN_INFO "pktgen: increased datalen to %d\n",
-			       datalen);
+			pr_info("increased datalen to %d\n", datalen);
 	}
 
 	udph->source = htons(pkt_dev->cur_udp_src);
@@ -3016,8 +3137,10 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 		}
 	}
 
-	/* Stamp the time, and sequence number, convert them to network byte order */
-	/* should we update cloned packets too ? */
+	/* Stamp the time, and sequence number,
+	 * convert them to network byte order
+	 * should we update cloned packets too ?
+	 */
 	if (pgh) {
 		struct timeval timestamp;
 
@@ -3033,8 +3156,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	return skb;
 }
 
-static inline struct sk_buff *fill_packet(struct net_device *odev,
-					  struct pktgen_dev *pkt_dev)
+static struct sk_buff *fill_packet(struct net_device *odev,
+				   struct pktgen_dev *pkt_dev)
 {
 	if (pkt_dev->flags & F_IPV6)
 		return fill_packet_ipv6(odev, pkt_dev);
@@ -3058,7 +3181,7 @@ static void pktgen_run(struct pktgen_thread *t)
 	struct pktgen_dev *pkt_dev;
 	int started = 0;
 
-	pr_debug("pktgen: entering pktgen_run. %p\n", t);
+	func_enter();
 
 	if_lock(t);
 	list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -3072,9 +3195,9 @@ static void pktgen_run(struct pktgen_thread *t)
 			pktgen_clear_counters(pkt_dev);
 			pkt_dev->running = 1;	/* Cranke yeself! */
 			pkt_dev->skb = NULL;
-			pkt_dev->started_at = getCurUs();
-			pkt_dev->next_tx_us = getCurUs();	/* Transmit immediately */
-			pkt_dev->next_tx_ns = 0;
+			pkt_dev->started_at =
+				pkt_dev->next_tx = ktime_now();
+
 			set_pkt_overhead(pkt_dev);
 
 			strcpy(pkt_dev->result, "Starting");
@@ -3091,7 +3214,7 @@ static void pktgen_stop_all_threads_ifs(void)
 {
 	struct pktgen_thread *t;
 
-	pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n");
+	func_enter();
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3101,17 +3224,14 @@ static void pktgen_stop_all_threads_ifs(void)
 	mutex_unlock(&pktgen_thread_lock);
 }
 
-static int thread_is_running(struct pktgen_thread *t)
+static int thread_is_running(const struct pktgen_thread *t)
 {
-	struct pktgen_dev *pkt_dev;
-	int res = 0;
+	const struct pktgen_dev *pkt_dev;
 
 	list_for_each_entry(pkt_dev, &t->if_list, list)
-		if (pkt_dev->running) {
-			res = 1;
-			break;
-		}
-	return res;
+		if (pkt_dev->running)
+			return 1;
+	return 0;
 }
 
 static int pktgen_wait_thread_run(struct pktgen_thread *t)
@@ -3159,7 +3279,7 @@ static void pktgen_run_all_threads(void)
 {
 	struct pktgen_thread *t;
 
-	pr_debug("pktgen: entering pktgen_run_all_threads.\n");
+	func_enter();
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3168,7 +3288,8 @@ static void pktgen_run_all_threads(void)
 
 	mutex_unlock(&pktgen_thread_lock);
 
-	schedule_timeout_interruptible(msecs_to_jiffies(125));	/* Propagate thread->control  */
+	/* Propagate thread->control  */
+	schedule_timeout_interruptible(msecs_to_jiffies(125));
 
 	pktgen_wait_all_threads_run();
 }
@@ -3177,7 +3298,7 @@ static void pktgen_reset_all_threads(void)
 {
 	struct pktgen_thread *t;
 
-	pr_debug("pktgen: entering pktgen_reset_all_threads.\n");
+	func_enter();
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3186,35 +3307,29 @@ static void pktgen_reset_all_threads(void)
 
 	mutex_unlock(&pktgen_thread_lock);
 
-	schedule_timeout_interruptible(msecs_to_jiffies(125));	/* Propagate thread->control  */
+	/* Propagate thread->control  */
+	schedule_timeout_interruptible(msecs_to_jiffies(125));
 
 	pktgen_wait_all_threads_run();
 }
 
 static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
 {
-	__u64 total_us, bps, mbps, pps, idle;
+	__u64 bps, mbps, pps;
 	char *p = pkt_dev->result;
-
-	total_us = pkt_dev->stopped_at - pkt_dev->started_at;
-
-	idle = pkt_dev->idle_acc;
-
-	p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
-		     (unsigned long long)total_us,
-		     (unsigned long long)(total_us - idle),
-		     (unsigned long long)idle,
+	ktime_t elapsed = ktime_sub(pkt_dev->stopped_at,
+				    pkt_dev->started_at);
+	ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
+
+	p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n",
+		     (unsigned long long)ktime_to_us(elapsed),
+		     (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
+		     (unsigned long long)ktime_to_us(idle),
 		     (unsigned long long)pkt_dev->sofar,
 		     pkt_dev->cur_pkt_size, nr_frags);
 
-	pps = pkt_dev->sofar * USEC_PER_SEC;
-
-	while ((total_us >> 32) != 0) {
-		pps >>= 1;
-		total_us >>= 1;
-	}
-
-	do_div(pps, total_us);
+	pps = div64_u64(pkt_dev->sofar * NSEC_PER_SEC,
+			ktime_to_ns(elapsed));
 
 	bps = pps * 8 * pkt_dev->cur_pkt_size;
 
@@ -3228,18 +3343,19 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
 }
 
 /* Set stopped-at timer, remove from running list, do counters & statistics */
-
 static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
 {
 	int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
 
 	if (!pkt_dev->running) {
-		printk(KERN_WARNING "pktgen: interface: %s is already "
-		       "stopped\n", pkt_dev->odev->name);
+		pr_warning("interface: %s is already stopped\n",
+			   pkt_dev->odevname);
 		return -EINVAL;
 	}
 
-	pkt_dev->stopped_at = getCurUs();
+	kfree_skb(pkt_dev->skb);
+	pkt_dev->skb = NULL;
+	pkt_dev->stopped_at = ktime_now();
 	pkt_dev->running = 0;
 
 	show_results(pkt_dev, nr_frags);
@@ -3258,7 +3374,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
 			continue;
 		if (best == NULL)
 			best = pkt_dev;
-		else if (pkt_dev->next_tx_us < best->next_tx_us)
+		else if (ktime_lt(pkt_dev->next_tx, best->next_tx))
 			best = pkt_dev;
 	}
 	if_unlock(t);
@@ -3269,15 +3385,12 @@ static void pktgen_stop(struct pktgen_thread *t)
 {
 	struct pktgen_dev *pkt_dev;
 
-	pr_debug("pktgen: entering pktgen_stop\n");
+	func_enter();
 
 	if_lock(t);
 
 	list_for_each_entry(pkt_dev, &t->if_list, list) {
 		pktgen_stop_device(pkt_dev);
-		kfree_skb(pkt_dev->skb);
-
-		pkt_dev->skb = NULL;
 	}
 
 	if_unlock(t);
@@ -3292,7 +3405,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
 	struct list_head *q, *n;
 	struct pktgen_dev *cur;
 
-	pr_debug("pktgen: entering pktgen_rem_one_if\n");
+	func_enter();
 
 	if_lock(t);
 
@@ -3318,9 +3431,10 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 	struct list_head *q, *n;
 	struct pktgen_dev *cur;
 
+	func_enter();
+
 	/* Remove all devices, free mem */
 
-	pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
 	if_lock(t);
 
 	list_for_each_safe(q, n, &t->if_list) {
@@ -3341,163 +3455,123 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
 
 	remove_proc_entry(t->tsk->comm, pg_proc_dir);
 
-	mutex_lock(&pktgen_thread_lock);
+}
 
-	list_del(&t->th_list);
+static void pktgen_resched(struct pktgen_dev *pkt_dev)
+{
+	ktime_t idle_start = ktime_now();
+	schedule();
+	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+}
 
-	mutex_unlock(&pktgen_thread_lock);
+static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
+{
+	ktime_t idle_start = ktime_now();
+
+	while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+		if (signal_pending(current))
+			break;
+
+		if (need_resched())
+			pktgen_resched(pkt_dev);
+		else
+			cpu_relax();
+	}
+	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
 }
 
-static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
+static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 {
 	struct net_device *odev = pkt_dev->odev;
-	int (*xmit)(struct sk_buff *, struct net_device *)
+	netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *)
 		= odev->netdev_ops->ndo_start_xmit;
 	struct netdev_queue *txq;
-	__u64 idle_start = 0;
 	u16 queue_map;
 	int ret;
 
-	if (pkt_dev->delay_us || pkt_dev->delay_ns) {
-		u64 now;
-
-		now = getCurUs();
-		if (now < pkt_dev->next_tx_us)
-			spin(pkt_dev, pkt_dev->next_tx_us);
-
-		/* This is max DELAY, this has special meaning of
-		 * "never transmit"
-		 */
-		if (pkt_dev->delay_us == 0x7FFFFFFF) {
-			pkt_dev->next_tx_us = getCurUs() + pkt_dev->delay_us;
-			pkt_dev->next_tx_ns = pkt_dev->delay_ns;
-			goto out;
-		}
+	/* If device is offline, then don't send */
+	if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) {
+		pktgen_stop_device(pkt_dev);
+		return;
 	}
 
-	if (!pkt_dev->skb) {
-		set_cur_queue_map(pkt_dev);
-		queue_map = pkt_dev->cur_queue_map;
-	} else {
-		queue_map = skb_get_queue_mapping(pkt_dev->skb);
+	/* This is max DELAY, this has special meaning of
+	 * "never transmit"
+	 */
+	if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
+		pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
+		return;
 	}
 
-	txq = netdev_get_tx_queue(odev, queue_map);
-	if (netif_tx_queue_stopped(txq) ||
-	    netif_tx_queue_frozen(txq) ||
-	    need_resched()) {
-		idle_start = getCurUs();
-
-		if (!netif_running(odev)) {
-			pktgen_stop_device(pkt_dev);
-			kfree_skb(pkt_dev->skb);
-			pkt_dev->skb = NULL;
-			goto out;
-		}
-		if (need_resched())
-			schedule();
-
-		pkt_dev->idle_acc += getCurUs() - idle_start;
+	/* If no skb or clone count exhausted then get new one */
+	if (!pkt_dev->skb || (pkt_dev->last_ok &&
+			      ++pkt_dev->clone_count >= pkt_dev->clone_skb)) {
+		/* build a new pkt */
+		kfree_skb(pkt_dev->skb);
 
-		if (netif_tx_queue_stopped(txq) ||
-		    netif_tx_queue_frozen(txq)) {
-			pkt_dev->next_tx_us = getCurUs();	/* TODO */
-			pkt_dev->next_tx_ns = 0;
-			goto out;	/* Try the next interface */
+		pkt_dev->skb = fill_packet(odev, pkt_dev);
+		if (pkt_dev->skb == NULL) {
+			pr_err("ERROR: couldn't allocate skb in fill_packet\n");
+			schedule();
+			pkt_dev->clone_count--;	/* back out increment, OOM */
+			return;
 		}
+		pkt_dev->last_pkt_size = pkt_dev->skb->len;
+		pkt_dev->allocated_skbs++;
+		pkt_dev->clone_count = 0;	/* reset counter */
 	}
 
-	if (pkt_dev->last_ok || !pkt_dev->skb) {
-		if ((++pkt_dev->clone_count >= pkt_dev->clone_skb)
-		    || (!pkt_dev->skb)) {
-			/* build a new pkt */
-			kfree_skb(pkt_dev->skb);
-
-			pkt_dev->skb = fill_packet(odev, pkt_dev);
-			if (pkt_dev->skb == NULL) {
-				printk(KERN_ERR "pktgen: ERROR: couldn't "
-				       "allocate skb in fill_packet.\n");
-				schedule();
-				pkt_dev->clone_count--;	/* back out increment, OOM */
-				goto out;
-			}
-			pkt_dev->allocated_skbs++;
-			pkt_dev->clone_count = 0;	/* reset counter */
-		}
-	}
+	if (pkt_dev->delay && pkt_dev->last_ok)
+		spin(pkt_dev, pkt_dev->next_tx);
 
-	/* fill_packet() might have changed the queue */
 	queue_map = skb_get_queue_mapping(pkt_dev->skb);
 	txq = netdev_get_tx_queue(odev, queue_map);
 
 	__netif_tx_lock_bh(txq);
-	if (!netif_tx_queue_stopped(txq) &&
-	    !netif_tx_queue_frozen(txq)) {
-
-		atomic_inc(&(pkt_dev->skb->users));
-	      retry_now:
-		ret = (*xmit)(pkt_dev->skb, odev);
-		if (likely(ret == NETDEV_TX_OK)) {
-			txq_trans_update(txq);
-			pkt_dev->last_ok = 1;
-			pkt_dev->sofar++;
-			pkt_dev->seq_num++;
-			pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
-
-		} else if (ret == NETDEV_TX_LOCKED
-			   && (odev->features & NETIF_F_LLTX)) {
-			cpu_relax();
-			goto retry_now;
-		} else {	/* Retry it next time */
-
-			atomic_dec(&(pkt_dev->skb->users));
-
-			if (debug && net_ratelimit())
-				printk(KERN_INFO "pktgen: Hard xmit error\n");
-
-			pkt_dev->errors++;
-			pkt_dev->last_ok = 0;
-		}
-
-		pkt_dev->next_tx_us = getCurUs();
-		pkt_dev->next_tx_ns = 0;
 
-		pkt_dev->next_tx_us += pkt_dev->delay_us;
-		pkt_dev->next_tx_ns += pkt_dev->delay_ns;
-
-		if (pkt_dev->next_tx_ns > 1000) {
-			pkt_dev->next_tx_us++;
-			pkt_dev->next_tx_ns -= 1000;
-		}
-	}
-
-	else {			/* Retry it next time */
+	if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
+		ret = NETDEV_TX_BUSY;
+		pkt_dev->last_ok = 0;
+		goto unlock;
+	}
+	atomic_inc(&(pkt_dev->skb->users));
+	ret = (*xmit)(pkt_dev->skb, odev);
+
+	switch (ret) {
+	case NETDEV_TX_OK:
+		txq_trans_update(txq);
+		pkt_dev->last_ok = 1;
+		pkt_dev->sofar++;
+		pkt_dev->seq_num++;
+		pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
+		break;
+	case NET_XMIT_DROP:
+	case NET_XMIT_CN:
+	case NET_XMIT_POLICED:
+		/* skb has been consumed */
+		pkt_dev->errors++;
+		break;
+	default: /* Drivers are not supposed to return other values! */
+		if (net_ratelimit())
+			pr_info("%s xmit error: %d\n", pkt_dev->odevname, ret);
+		pkt_dev->errors++;
+		/* fallthru */
+	case NETDEV_TX_LOCKED:
+	case NETDEV_TX_BUSY:
+		/* Retry it next time */
+		atomic_dec(&(pkt_dev->skb->users));
 		pkt_dev->last_ok = 0;
-		pkt_dev->next_tx_us = getCurUs();	/* TODO */
-		pkt_dev->next_tx_ns = 0;
 	}
-
+unlock:
 	__netif_tx_unlock_bh(txq);
 
 	/* If pkt_dev->count is zero, then run forever */
 	if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
-		if (atomic_read(&(pkt_dev->skb->users)) != 1) {
-			idle_start = getCurUs();
-			while (atomic_read(&(pkt_dev->skb->users)) != 1) {
-				if (signal_pending(current)) {
-					break;
-				}
-				schedule();
-			}
-			pkt_dev->idle_acc += getCurUs() - idle_start;
-		}
+		pktgen_wait_for_skb(pkt_dev);
 
 		/* Done with this */
 		pktgen_stop_device(pkt_dev);
-		kfree_skb(pkt_dev->skb);
-		pkt_dev->skb = NULL;
 	}
-out:;
 }
 
 /*
@@ -3516,7 +3590,7 @@ static int pktgen_thread_worker(void *arg)
 	init_waitqueue_head(&t->queue);
 	complete(&t->start_done);
 
-	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
+	pr_debug("starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
@@ -3525,20 +3599,27 @@ static int pktgen_thread_worker(void *arg)
 	while (!kthread_should_stop()) {
 		pkt_dev = next_to_run(t);
 
-		if (!pkt_dev &&
-		    (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV))
-		    == 0) {
-			prepare_to_wait(&(t->queue), &wait,
-					TASK_INTERRUPTIBLE);
-			schedule_timeout(HZ / 10);
-			finish_wait(&(t->queue), &wait);
+		if (unlikely(!pkt_dev && t->control == 0)) {
+			if (pktgen_exiting)
+				break;
+			wait_event_interruptible_timeout(t->queue,
+							 t->control != 0,
+							 HZ/10);
+			try_to_freeze();
+			continue;
 		}
 
 		__set_current_state(TASK_RUNNING);
 
-		if (pkt_dev)
+		if (likely(pkt_dev)) {
 			pktgen_xmit(pkt_dev);
 
+			if (need_resched())
+				pktgen_resched(pkt_dev);
+			else
+				cpu_relax();
+		}
+
 		if (t->control & T_STOP) {
 			pktgen_stop(t);
 			t->control &= ~(T_STOP);
@@ -3564,32 +3645,44 @@ static int pktgen_thread_worker(void *arg)
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
 
-	pr_debug("pktgen: %s stopping all device\n", t->tsk->comm);
+	pr_debug("%s stopping all device\n", t->tsk->comm);
 	pktgen_stop(t);
 
-	pr_debug("pktgen: %s removing all device\n", t->tsk->comm);
+	pr_debug("%s removing all device\n", t->tsk->comm);
 	pktgen_rem_all_ifs(t);
 
-	pr_debug("pktgen: %s removing thread.\n", t->tsk->comm);
+	pr_debug("%s removing thread\n", t->tsk->comm);
 	pktgen_rem_thread(t);
 
+	/* Wait for kthread_stop */
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
 	return 0;
 }
 
 static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
-					  const char *ifname)
+					  const char *ifname, bool exact)
 {
 	struct pktgen_dev *p, *pkt_dev = NULL;
-	if_lock(t);
+	size_t len = strlen(ifname);
 
+	if_lock(t);
 	list_for_each_entry(p, &t->if_list, list)
-		if (strncmp(p->odev->name, ifname, IFNAMSIZ) == 0) {
+		if (strncmp(p->odevname, ifname, len) == 0) {
+			if (p->odevname[len]) {
+				if (exact || p->odevname[len] != '@')
+					continue;
+			}
 			pkt_dev = p;
 			break;
 		}
 
 	if_unlock(t);
-	pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev);
+	pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
 	return pkt_dev;
 }
 
@@ -3605,8 +3698,7 @@ static int add_dev_to_thread(struct pktgen_thread *t,
 	if_lock(t);
 
 	if (pkt_dev->pg_thread) {
-		printk(KERN_ERR "pktgen: ERROR: already assigned "
-		       "to a thread.\n");
+		pr_err("ERROR: already assigned to a thread\n");
 		rv = -EBUSY;
 		goto out;
 	}
@@ -3626,20 +3718,23 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 {
 	struct pktgen_dev *pkt_dev;
 	int err;
+	int node = cpu_to_node(t->cpu);
 
 	/* We don't allow a device to be on several threads */
 
 	pkt_dev = __pktgen_NN_threads(ifname, FIND);
 	if (pkt_dev) {
-		printk(KERN_ERR "pktgen: ERROR: interface already used.\n");
+		pr_err("ERROR: interface already used\n");
 		return -EBUSY;
 	}
 
-	pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL);
+	pkt_dev = kzalloc_node(sizeof(struct pktgen_dev), GFP_KERNEL, node);
 	if (!pkt_dev)
 		return -ENOMEM;
 
-	pkt_dev->flows = vmalloc(MAX_CFLOWS * sizeof(struct flow_state));
+	strcpy(pkt_dev->odevname, ifname);
+	pkt_dev->flows = vmalloc_node(MAX_CFLOWS * sizeof(struct flow_state),
+				      node);
 	if (pkt_dev->flows == NULL) {
 		kfree(pkt_dev);
 		return -ENOMEM;
@@ -3651,8 +3746,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->max_pkt_size = ETH_ZLEN;
 	pkt_dev->nfrags = 0;
 	pkt_dev->clone_skb = pg_clone_skb_d;
-	pkt_dev->delay_us = pg_delay_d / 1000;
-	pkt_dev->delay_ns = pg_delay_d % 1000;
+	pkt_dev->delay = pg_delay_d;
 	pkt_dev->count = pg_count_d;
 	pkt_dev->sofar = 0;
 	pkt_dev->udp_src_min = 9;	/* sink port */
@@ -3666,6 +3760,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->svlan_p = 0;
 	pkt_dev->svlan_cfi = 0;
 	pkt_dev->svlan_id = 0xffff;
+	pkt_dev->node = -1;
 
 	err = pktgen_setup_dev(pkt_dev, ifname);
 	if (err)
@@ -3674,7 +3769,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
 					  &pktgen_if_fops, pkt_dev);
 	if (!pkt_dev->entry) {
-		printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
+		pr_err("cannot create %s/%s procfs entry\n",
 		       PG_PROC_DIR, ifname);
 		err = -EINVAL;
 		goto out2;
@@ -3702,10 +3797,10 @@ static int __init pktgen_create_thread(int cpu)
 	struct proc_dir_entry *pe;
 	struct task_struct *p;
 
-	t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL);
+	t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
+			 cpu_to_node(cpu));
 	if (!t) {
-		printk(KERN_ERR "pktgen: ERROR: out of memory, can't "
-		       "create new thread.\n");
+		pr_err("ERROR: out of memory, can't create new thread\n");
 		return -ENOMEM;
 	}
 
@@ -3719,8 +3814,7 @@ static int __init pktgen_create_thread(int cpu)
 
 	p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
 	if (IS_ERR(p)) {
-		printk(KERN_ERR "pktgen: kernel_thread() failed "
-		       "for cpu %d\n", t->cpu);
+		pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
 		list_del(&t->th_list);
 		kfree(t);
 		return PTR_ERR(p);
@@ -3731,7 +3825,7 @@ static int __init pktgen_create_thread(int cpu)
 	pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
 			      &pktgen_thread_fops, t);
 	if (!pe) {
-		printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
+		pr_err("cannot create %s/%s procfs entry\n",
 		       PG_PROC_DIR, t->tsk->comm);
 		kthread_stop(p);
 		list_del(&t->th_list);
@@ -3765,11 +3859,10 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 				struct pktgen_dev *pkt_dev)
 {
 
-	pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
+	pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
 
 	if (pkt_dev->running) {
-		printk(KERN_WARNING "pktgen: WARNING: trying to remove a "
-		       "running interface, stopping it now.\n");
+		pr_warning("WARNING: trying to remove a running interface, stopping it now\n");
 		pktgen_stop_device(pkt_dev);
 	}
 
@@ -3800,7 +3893,7 @@ static int __init pg_init(void)
 	int cpu;
 	struct proc_dir_entry *pe;
 
-	printk(KERN_INFO "%s", version);
+	pr_info("%s", version);
 
 	pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
 	if (!pg_proc_dir)
@@ -3808,8 +3901,7 @@ static int __init pg_init(void)
 
 	pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
 	if (pe == NULL) {
-		printk(KERN_ERR "pktgen: ERROR: cannot create %s "
-		       "procfs entry.\n", PGCTRL);
+		pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
 		proc_net_remove(&init_net, PG_PROC_DIR);
 		return -EINVAL;
 	}
@@ -3822,13 +3914,12 @@ static int __init pg_init(void)
 
 		err = pktgen_create_thread(cpu);
 		if (err)
-			printk(KERN_WARNING "pktgen: WARNING: Cannot create "
-			       "thread for cpu %d (%d)\n", cpu, err);
+			pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
+				   cpu, err);
 	}
 
 	if (list_empty(&pktgen_threads)) {
-		printk(KERN_ERR "pktgen: ERROR: Initialization failed for "
-		       "all threads\n");
+		pr_err("ERROR: Initialization failed for all threads\n");
 		unregister_netdevice_notifier(&pktgen_notifier_block);
 		remove_proc_entry(PGCTRL, pg_proc_dir);
 		proc_net_remove(&init_net, PG_PROC_DIR);
@@ -3842,10 +3933,9 @@ static void __exit pg_cleanup(void)
 {
 	struct pktgen_thread *t;
 	struct list_head *q, *n;
-	wait_queue_head_t queue;
-	init_waitqueue_head(&queue);
 
 	/* Stop all interfaces & threads */
+	pktgen_exiting = true;
 
 	list_for_each_safe(q, n, &pktgen_threads) {
 		t = list_entry(q, struct pktgen_thread, th_list);
@@ -3864,10 +3954,15 @@ static void __exit pg_cleanup(void)
 module_init(pg_init);
 module_exit(pg_cleanup);
 
-MODULE_AUTHOR("Robert Olsson <robert.olsson@its.uu.se");
+MODULE_AUTHOR("Robert Olsson <robert.olsson@its.uu.se>");
 MODULE_DESCRIPTION("Packet Generator tool");
 MODULE_LICENSE("GPL");
+MODULE_VERSION(VERSION);
 module_param(pg_count_d, int, 0);
+MODULE_PARM_DESC(pg_count_d, "Default number of packets to inject");
 module_param(pg_delay_d, int, 0);
+MODULE_PARM_DESC(pg_delay_d, "Default delay between packets (nanoseconds)");
 module_param(pg_clone_skb_d, int, 0);
+MODULE_PARM_DESC(pg_clone_skb_d, "Default number of copies of the same packet");
 module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Enable debugging of pktgen module");
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495aff7..182236b2510 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,6 +33,7 @@
  * Note : Dont forget somaxconn that may limit backlog too.
  */
 int sysctl_max_syn_backlog = 256;
+EXPORT_SYMBOL(sysctl_max_syn_backlog);
 
 int reqsk_queue_alloc(struct request_sock_queue *queue,
 		      unsigned int nr_table_entries)
@@ -45,9 +46,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
 	lopt_size += nr_table_entries * sizeof(struct request_sock *);
 	if (lopt_size > PAGE_SIZE)
-		lopt = __vmalloc(lopt_size,
-			GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
-			PAGE_KERNEL);
+		lopt = vzalloc(lopt_size);
 	else
 		lopt = kzalloc(lopt_size, GFP_KERNEL);
 	if (lopt == NULL)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d78030f88bd..a5f7535aab5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,11 +35,10 @@
 #include <linux/security.h>
 #include <linux/mutex.h>
 #include <linux/if_addr.h>
-#include <linux/nsproxy.h>
+#include <linux/pci.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <asm/string.h>
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -52,9 +51,9 @@
 #include <net/pkt_sched.h>
 #include <net/fib_rules.h>
 #include <net/rtnetlink.h>
+#include <net/net_namespace.h>
 
-struct rtnl_link
-{
+struct rtnl_link {
 	rtnl_doit_func		doit;
 	rtnl_dumpit_func	dumpit;
 };
@@ -65,6 +64,7 @@ void rtnl_lock(void)
 {
 	mutex_lock(&rtnl_mutex);
 }
+EXPORT_SYMBOL(rtnl_lock);
 
 void __rtnl_unlock(void)
 {
@@ -76,18 +76,29 @@ void rtnl_unlock(void)
 	/* This fellow will unlock it for us. */
 	netdev_run_todo();
 }
+EXPORT_SYMBOL(rtnl_unlock);
 
 int rtnl_trylock(void)
 {
 	return mutex_trylock(&rtnl_mutex);
 }
+EXPORT_SYMBOL(rtnl_trylock);
 
 int rtnl_is_locked(void)
 {
 	return mutex_is_locked(&rtnl_mutex);
 }
+EXPORT_SYMBOL(rtnl_is_locked);
 
-static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rtnl_is_held(void)
+{
+	return lockdep_is_held(&rtnl_mutex);
+}
+EXPORT_SYMBOL(lockdep_rtnl_is_held);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+
+static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
 
 static inline int rtm_msgindex(int msgtype)
 {
@@ -107,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	tab = rtnl_msg_handlers[protocol];
+	if (protocol <= RTNL_FAMILY_MAX)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
 	if (tab == NULL || tab[msgindex].doit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
@@ -118,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	tab = rtnl_msg_handlers[protocol];
+	if (protocol <= RTNL_FAMILY_MAX)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
 	if (tab == NULL || tab[msgindex].dumpit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
@@ -148,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype,
 	struct rtnl_link *tab;
 	int msgindex;
 
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
 	tab = rtnl_msg_handlers[protocol];
@@ -168,7 +187,6 @@ int __rtnl_register(int protocol, int msgtype,
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(__rtnl_register);
 
 /**
@@ -188,7 +206,6 @@ void rtnl_register(int protocol, int msgtype,
 		      "protocol = %d, message type = %d\n",
 		      protocol, msgtype);
 }
-
 EXPORT_SYMBOL_GPL(rtnl_register);
 
 /**
@@ -202,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype)
 {
 	int msgindex;
 
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
 	if (rtnl_msg_handlers[protocol] == NULL)
@@ -213,7 +230,6 @@ int rtnl_unregister(int protocol, int msgtype)
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(rtnl_unregister);
 
 /**
@@ -225,12 +241,11 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
  */
 void rtnl_unregister_all(int protocol)
 {
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 
 	kfree(rtnl_msg_handlers[protocol]);
 	rtnl_msg_handlers[protocol] = NULL;
 }
-
 EXPORT_SYMBOL_GPL(rtnl_unregister_all);
 
 static LIST_HEAD(link_ops);
@@ -248,12 +263,11 @@ static LIST_HEAD(link_ops);
 int __rtnl_link_register(struct rtnl_link_ops *ops)
 {
 	if (!ops->dellink)
-		ops->dellink = unregister_netdevice;
+		ops->dellink = unregister_netdevice_queue;
 
 	list_add_tail(&ops->list, &link_ops);
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(__rtnl_link_register);
 
 /**
@@ -271,29 +285,20 @@ int rtnl_link_register(struct rtnl_link_ops *ops)
 	rtnl_unlock();
 	return err;
 }
-
 EXPORT_SYMBOL_GPL(rtnl_link_register);
 
 static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
 {
 	struct net_device *dev;
-restart:
+	LIST_HEAD(list_kill);
+
 	for_each_netdev(net, dev) {
-		if (dev->rtnl_link_ops == ops) {
-			ops->dellink(dev);
-			goto restart;
-		}
+		if (dev->rtnl_link_ops == ops)
+			ops->dellink(dev, &list_kill);
 	}
+	unregister_netdevice_many(&list_kill);
 }
 
-void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
-{
-	rtnl_lock();
-	__rtnl_kill_links(net, ops);
-	rtnl_unlock();
-}
-EXPORT_SYMBOL_GPL(rtnl_kill_links);
-
 /**
  * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
  * @ops: struct rtnl_link_ops * to unregister
@@ -309,7 +314,6 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
 	}
 	list_del(&ops->list);
 }
-
 EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
 
 /**
@@ -322,7 +326,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops)
 	__rtnl_link_unregister(ops);
 	rtnl_unlock();
 }
-
 EXPORT_SYMBOL_GPL(rtnl_link_unregister);
 
 static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
@@ -344,16 +347,106 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
 	if (!ops)
 		return 0;
 
-	size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
-	       nlmsg_total_size(strlen(ops->kind) + 1);	 /* IFLA_INFO_KIND */
+	size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
+	       nla_total_size(strlen(ops->kind) + 1);  /* IFLA_INFO_KIND */
 
 	if (ops->get_size)
 		/* IFLA_INFO_DATA + nested data */
-		size += nlmsg_total_size(sizeof(struct nlattr)) +
+		size += nla_total_size(sizeof(struct nlattr)) +
 			ops->get_size(dev);
 
 	if (ops->get_xstats_size)
-		size += ops->get_xstats_size(dev);	/* IFLA_INFO_XSTATS */
+		/* IFLA_INFO_XSTATS */
+		size += nla_total_size(ops->get_xstats_size(dev));
+
+	return size;
+}
+
+static LIST_HEAD(rtnl_af_ops);
+
+static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+{
+	const struct rtnl_af_ops *ops;
+
+	list_for_each_entry(ops, &rtnl_af_ops, list) {
+		if (ops->family == family)
+			return ops;
+	}
+
+	return NULL;
+}
+
+/**
+ * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * The caller must hold the rtnl_mutex.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_af_register(struct rtnl_af_ops *ops)
+{
+	list_add_tail(&ops->list, &rtnl_af_ops);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_register);
+
+/**
+ * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_af_register(struct rtnl_af_ops *ops)
+{
+	int err;
+
+	rtnl_lock();
+	err = __rtnl_af_register(ops);
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtnl_af_register);
+
+/**
+ * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void __rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+	list_del(&ops->list);
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
+
+/**
+ * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ */
+void rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+	rtnl_lock();
+	__rtnl_af_unregister(ops);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(rtnl_af_unregister);
+
+static size_t rtnl_link_get_af_size(const struct net_device *dev)
+{
+	struct rtnl_af_ops *af_ops;
+	size_t size;
+
+	/* IFLA_AF_SPEC */
+	size = nla_total_size(sizeof(struct nlattr));
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->get_link_af_size) {
+			/* AF_* + nested data */
+			size += nla_total_size(sizeof(struct nlattr)) +
+				af_ops->get_link_af_size(dev);
+		}
+	}
 
 	return size;
 }
@@ -427,12 +520,13 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data
 	struct rtattr *rta;
 	int size = RTA_LENGTH(attrlen);
 
-	rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
+	rta = (struct rtattr *)skb_put(skb, RTA_ALIGN(size));
 	rta->rta_type = attrtype;
 	rta->rta_len = size;
 	memcpy(RTA_DATA(rta), data, attrlen);
 	memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
 }
+EXPORT_SYMBOL(__rta_fill);
 
 int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
 {
@@ -454,6 +548,7 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
 
 	return nlmsg_unicast(rtnl, skb, pid);
 }
+EXPORT_SYMBOL(rtnl_unicast);
 
 void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
 		 struct nlmsghdr *nlh, gfp_t flags)
@@ -466,6 +561,7 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
 
 	nlmsg_notify(rtnl, skb, pid, group, report, flags);
 }
+EXPORT_SYMBOL(rtnl_notify);
 
 void rtnl_set_sk_err(struct net *net, u32 group, int error)
 {
@@ -473,6 +569,7 @@ void rtnl_set_sk_err(struct net *net, u32 group, int error)
 
 	netlink_set_err(rtnl, 0, group, error);
 }
+EXPORT_SYMBOL(rtnl_set_sk_err);
 
 int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 {
@@ -501,6 +598,7 @@ nla_put_failure:
 	nla_nest_cancel(skb, mx);
 	return -EMSGSIZE;
 }
+EXPORT_SYMBOL(rtnetlink_put_metrics);
 
 int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
 		       u32 ts, u32 tsage, long expires, u32 error)
@@ -520,14 +618,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
 
 	return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
 }
-
 EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
 
 static void set_operstate(struct net_device *dev, unsigned char transition)
 {
 	unsigned char operstate = dev->operstate;
 
-	switch(transition) {
+	switch (transition) {
 	case IF_OPER_UP:
 		if ((operstate == IF_OPER_DORMANT ||
 		     operstate == IF_OPER_UNKNOWN) &&
@@ -550,8 +647,21 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
 	}
 }
 
+static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
+					   const struct ifinfomsg *ifm)
+{
+	unsigned int flags = ifm->ifi_flags;
+
+	/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
+	if (ifm->ifi_change)
+		flags = (flags & ifm->ifi_change) |
+			(dev->flags & ~ifm->ifi_change);
+
+	return flags;
+}
+
 static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
-				 const struct net_device_stats *b)
+				 const struct rtnl_link_stats64 *b)
 {
 	a->rx_packets = b->rx_packets;
 	a->tx_packets = b->tx_packets;
@@ -580,9 +690,56 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 
 	a->rx_compressed = b->rx_compressed;
 	a->tx_compressed = b->tx_compressed;
-};
+}
 
-static inline size_t if_nlmsg_size(const struct net_device *dev)
+static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
+{
+	memcpy(v, b, sizeof(*b));
+}
+
+/* All VF info */
+static inline int rtnl_vfinfo_size(const struct net_device *dev)
+{
+	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
+
+		int num_vfs = dev_num_vf(dev->dev.parent);
+		size_t size = nla_total_size(sizeof(struct nlattr));
+		size += nla_total_size(num_vfs * sizeof(struct nlattr));
+		size += num_vfs *
+			(nla_total_size(sizeof(struct ifla_vf_mac)) +
+			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
+			 nla_total_size(sizeof(struct ifla_vf_tx_rate)));
+		return size;
+	} else
+		return 0;
+}
+
+static size_t rtnl_port_size(const struct net_device *dev)
+{
+	size_t port_size = nla_total_size(4)		/* PORT_VF */
+		+ nla_total_size(PORT_PROFILE_MAX)	/* PORT_PROFILE */
+		+ nla_total_size(sizeof(struct ifla_port_vsi))
+							/* PORT_VSI_TYPE */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_INSTANCE_UUID */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_HOST_UUID */
+		+ nla_total_size(1)			/* PROT_VDP_REQUEST */
+		+ nla_total_size(2);			/* PORT_VDP_RESPONSE */
+	size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
+	size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+	size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+	if (dev_num_vf(dev->dev.parent))
+		return port_self_size + vf_ports_size +
+			vf_port_size * dev_num_vf(dev->dev.parent);
+	else
+		return port_self_size;
+}
+
+static noinline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
 	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -590,6 +747,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
 	       + nla_total_size(sizeof(struct rtnl_link_ifmap))
 	       + nla_total_size(sizeof(struct rtnl_link_stats))
+	       + nla_total_size(sizeof(struct rtnl_link_stats64))
 	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
 	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
 	       + nla_total_size(4) /* IFLA_TXQLEN */
@@ -599,18 +757,98 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(4) /* IFLA_MASTER */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
-	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+	       + nla_total_size(4) /* IFLA_NUM_VF */
+	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
+}
+
+static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *vf_ports;
+	struct nlattr *vf_port;
+	int vf;
+	int err;
+
+	vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
+	if (!vf_ports)
+		return -EMSGSIZE;
+
+	for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
+		vf_port = nla_nest_start(skb, IFLA_VF_PORT);
+		if (!vf_port)
+			goto nla_put_failure;
+		NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
+		err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+		if (err == -EMSGSIZE)
+			goto nla_put_failure;
+		if (err) {
+			nla_nest_cancel(skb, vf_port);
+			continue;
+		}
+		nla_nest_end(skb, vf_port);
+	}
+
+	nla_nest_end(skb, vf_ports);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, vf_ports);
+	return -EMSGSIZE;
+}
+
+static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *port_self;
+	int err;
+
+	port_self = nla_nest_start(skb, IFLA_PORT_SELF);
+	if (!port_self)
+		return -EMSGSIZE;
+
+	err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
+	if (err) {
+		nla_nest_cancel(skb, port_self);
+		return (err == -EMSGSIZE) ? err : 0;
+	}
+
+	nla_nest_end(skb, port_self);
+
+	return 0;
+}
+
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	int err;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+
+	err = rtnl_port_self_fill(skb, dev);
+	if (err)
+		return err;
+
+	if (dev_num_vf(dev->dev.parent)) {
+		err = rtnl_vf_ports_fill(skb, dev);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags)
 {
-	struct netdev_queue *txq;
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
-	const struct net_device_stats *stats;
-	struct nlattr *attr;
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats;
+	struct nlattr *attr, *af_spec;
+	struct rtnl_af_ops *af_ops;
 
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
 	if (nlh == NULL)
@@ -637,9 +875,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (dev->master)
 		NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
 
-	txq = netdev_get_tx_queue(dev, 0);
-	if (txq->qdisc_sleeping)
-		NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id);
+	if (dev->qdisc)
+		NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc->ops->id);
 
 	if (dev->ifalias)
 		NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias);
@@ -666,14 +903,90 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (attr == NULL)
 		goto nla_put_failure;
 
-	stats = dev_get_stats(dev);
+	stats = dev_get_stats(dev, &temp);
 	copy_rtnl_link_stats(nla_data(attr), stats);
 
+	attr = nla_reserve(skb, IFLA_STATS64,
+			sizeof(struct rtnl_link_stats64));
+	if (attr == NULL)
+		goto nla_put_failure;
+	copy_rtnl_link_stats64(nla_data(attr), stats);
+
+	if (dev->dev.parent)
+		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
+
+	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
+		int i;
+
+		struct nlattr *vfinfo, *vf;
+		int num_vfs = dev_num_vf(dev->dev.parent);
+
+		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+		if (!vfinfo)
+			goto nla_put_failure;
+		for (i = 0; i < num_vfs; i++) {
+			struct ifla_vf_info ivi;
+			struct ifla_vf_mac vf_mac;
+			struct ifla_vf_vlan vf_vlan;
+			struct ifla_vf_tx_rate vf_tx_rate;
+			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
+				break;
+			vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
+			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+			vf_vlan.vlan = ivi.vlan;
+			vf_vlan.qos = ivi.qos;
+			vf_tx_rate.rate = ivi.tx_rate;
+			vf = nla_nest_start(skb, IFLA_VF_INFO);
+			if (!vf) {
+				nla_nest_cancel(skb, vfinfo);
+				goto nla_put_failure;
+			}
+			NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
+			NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
+			NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
+			nla_nest_end(skb, vf);
+		}
+		nla_nest_end(skb, vfinfo);
+	}
+
+	if (rtnl_port_fill(skb, dev))
+		goto nla_put_failure;
+
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
 	}
 
+	if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+		goto nla_put_failure;
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->fill_link_af) {
+			struct nlattr *af;
+			int err;
+
+			if (!(af = nla_nest_start(skb, af_ops->family)))
+				goto nla_put_failure;
+
+			err = af_ops->fill_link_af(skb, dev);
+
+			/*
+			 * Caller may return ENODATA to indicate that there
+			 * was no data to be dumped. This is not an error, it
+			 * means we should trim the attribute header and
+			 * continue.
+			 */
+			if (err == -ENODATA)
+				nla_nest_cancel(skb, af);
+			else if (err < 0)
+				goto nla_put_failure;
+
+			nla_nest_end(skb, af);
+		}
+	}
+
+	nla_nest_end(skb, af_spec);
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -684,22 +997,33 @@ nla_put_failure:
 static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	int idx;
-	int s_idx = cb->args[0];
+	int h, s_h;
+	int idx = 0, s_idx;
 	struct net_device *dev;
-
-	idx = 0;
-	for_each_netdev(net, dev) {
-		if (idx < s_idx)
-			goto cont;
-		if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
-				     NETLINK_CB(cb->skb).pid,
-				     cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
-			break;
+	struct hlist_head *head;
+	struct hlist_node *node;
+
+	s_h = cb->args[0];
+	s_idx = cb->args[1];
+
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry(dev, node, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+					     NETLINK_CB(cb->skb).pid,
+					     cb->nlh->nlmsg_seq, 0,
+					     NLM_F_MULTI) <= 0)
+				goto out;
 cont:
-		idx++;
+			idx++;
+		}
 	}
-	cb->args[0] = idx;
+out:
+	cb->args[1] = idx;
+	cb->args[0] = h;
 
 	return skb->len;
 }
@@ -718,31 +1042,58 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
+	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
+	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
+	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
+	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
 };
+EXPORT_SYMBOL(ifla_policy);
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 	[IFLA_INFO_KIND]	= { .type = NLA_STRING },
 	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
 };
 
-static struct net *get_net_ns_by_pid(pid_t pid)
+static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
+	[IFLA_VF_INFO]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
+	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_mac) },
+	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_tx_rate) },
+};
+
+static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
+	[IFLA_PORT_VF]		= { .type = NLA_U32 },
+	[IFLA_PORT_PROFILE]	= { .type = NLA_STRING,
+				    .len = PORT_PROFILE_MAX },
+	[IFLA_PORT_VSI_TYPE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_port_vsi)},
+	[IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
+				      .len = PORT_UUID_MAX },
+	[IFLA_PORT_HOST_UUID]	= { .type = NLA_STRING,
+				    .len = PORT_UUID_MAX },
+	[IFLA_PORT_REQUEST]	= { .type = NLA_U8, },
+	[IFLA_PORT_RESPONSE]	= { .type = NLA_U16, },
+};
+
+struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
-	struct task_struct *tsk;
 	struct net *net;
-
-	/* Lookup the network namespace */
-	net = ERR_PTR(-ESRCH);
-	rcu_read_lock();
-	tsk = find_task_by_vpid(pid);
-	if (tsk) {
-		struct nsproxy *nsproxy;
-		nsproxy = task_nsproxy(tsk);
-		if (nsproxy)
-			net = get_net(nsproxy->net_ns);
-	}
-	rcu_read_unlock();
+	/* Examine the link attributes and figure out which
+	 * network namespace we are talking about.
+	 */
+	if (tb[IFLA_NET_NS_PID])
+		net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+	else
+		net = get_net(src_net);
 	return net;
 }
+EXPORT_SYMBOL(rtnl_link_get_net);
 
 static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 {
@@ -756,9 +1107,77 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 			return -EINVAL;
 	}
 
+	if (tb[IFLA_AF_SPEC]) {
+		struct nlattr *af;
+		int rem, err;
+
+		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+			const struct rtnl_af_ops *af_ops;
+
+			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+				return -EAFNOSUPPORT;
+
+			if (!af_ops->set_link_af)
+				return -EOPNOTSUPP;
+
+			if (af_ops->validate_link_af) {
+				err = af_ops->validate_link_af(dev,
+							tb[IFLA_AF_SPEC]);
+				if (err < 0)
+					return err;
+			}
+		}
+	}
+
 	return 0;
 }
 
+static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+{
+	int rem, err = -EINVAL;
+	struct nlattr *vf;
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	nla_for_each_nested(vf, attr, rem) {
+		switch (nla_type(vf)) {
+		case IFLA_VF_MAC: {
+			struct ifla_vf_mac *ivm;
+			ivm = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_mac)
+				err = ops->ndo_set_vf_mac(dev, ivm->vf,
+							  ivm->mac);
+			break;
+		}
+		case IFLA_VF_VLAN: {
+			struct ifla_vf_vlan *ivv;
+			ivv = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_vlan)
+				err = ops->ndo_set_vf_vlan(dev, ivv->vf,
+							   ivv->vlan,
+							   ivv->qos);
+			break;
+		}
+		case IFLA_VF_TX_RATE: {
+			struct ifla_vf_tx_rate *ivt;
+			ivt = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_tx_rate)
+				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
+							      ivt->rate);
+			break;
+		}
+		default:
+			err = -EINVAL;
+			break;
+		}
+		if (err)
+			break;
+	}
+	return err;
+}
+
 static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
@@ -767,8 +1186,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	int err;
 
 	if (tb[IFLA_NET_NS_PID]) {
-		struct net *net;
-		net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+		struct net *net = rtnl_link_get_net(dev_net(dev), tb);
 		if (IS_ERR(net)) {
 			err = PTR_ERR(net);
 			goto errout;
@@ -873,13 +1291,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 
 	if (ifm->ifi_flags || ifm->ifi_change) {
-		unsigned int flags = ifm->ifi_flags;
-
-		/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
-		if (ifm->ifi_change)
-			flags = (flags & ifm->ifi_change) |
-				(dev->flags & ~ifm->ifi_change);
-		err = dev_change_flags(dev, flags);
+		err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
 		if (err < 0)
 			goto errout;
 	}
@@ -896,6 +1308,85 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		write_unlock_bh(&dev_base_lock);
 	}
 
+	if (tb[IFLA_VFINFO_LIST]) {
+		struct nlattr *attr;
+		int rem;
+		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
+			if (nla_type(attr) != IFLA_VF_INFO) {
+				err = -EINVAL;
+				goto errout;
+			}
+			err = do_setvfinfo(dev, attr);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
+	}
+	err = 0;
+
+	if (tb[IFLA_VF_PORTS]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+		struct nlattr *attr;
+		int vf;
+		int rem;
+
+		err = -EOPNOTSUPP;
+		if (!ops->ndo_set_vf_port)
+			goto errout;
+
+		nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
+			if (nla_type(attr) != IFLA_VF_PORT)
+				continue;
+			err = nla_parse_nested(port, IFLA_PORT_MAX,
+				attr, ifla_port_policy);
+			if (err < 0)
+				goto errout;
+			if (!port[IFLA_PORT_VF]) {
+				err = -EOPNOTSUPP;
+				goto errout;
+			}
+			vf = nla_get_u32(port[IFLA_PORT_VF]);
+			err = ops->ndo_set_vf_port(dev, vf, port);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
+	}
+	err = 0;
+
+	if (tb[IFLA_PORT_SELF]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+
+		err = nla_parse_nested(port, IFLA_PORT_MAX,
+			tb[IFLA_PORT_SELF], ifla_port_policy);
+		if (err < 0)
+			goto errout;
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_port)
+			err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+
+	if (tb[IFLA_AF_SPEC]) {
+		struct nlattr *af;
+		int rem;
+
+		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+			const struct rtnl_af_ops *af_ops;
+
+			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+				BUG();
+
+			err = af_ops->set_link_af(dev, af);
+			if (err < 0)
+				goto errout;
+
+			modified = 1;
+		}
+	}
 	err = 0;
 
 errout:
@@ -931,9 +1422,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	err = -EINVAL;
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index > 0)
-		dev = dev_get_by_index(net, ifm->ifi_index);
+		dev = __dev_get_by_index(net, ifm->ifi_index);
 	else if (tb[IFLA_IFNAME])
-		dev = dev_get_by_name(net, ifname);
+		dev = __dev_get_by_name(net, ifname);
 	else
 		goto errout;
 
@@ -942,12 +1433,11 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		goto errout;
 	}
 
-	if ((err = validate_linkmsg(dev, tb)) < 0)
-		goto errout_dev;
+	err = validate_linkmsg(dev, tb);
+	if (err < 0)
+		goto errout;
 
 	err = do_setlink(dev, ifm, tb, ifname, 0);
-errout_dev:
-	dev_put(dev);
 errout:
 	return err;
 }
@@ -984,30 +1474,60 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if (!ops)
 		return -EOPNOTSUPP;
 
-	ops->dellink(dev);
+	ops->dellink(dev, NULL);
 	return 0;
 }
 
-struct net_device *rtnl_create_link(struct net *net, char *ifname,
-		const struct rtnl_link_ops *ops, struct nlattr *tb[])
+int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
+{
+	unsigned int old_flags;
+	int err;
+
+	old_flags = dev->flags;
+	if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
+		err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+		if (err < 0)
+			return err;
+	}
+
+	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+
+	__dev_notify_flags(dev, old_flags);
+	return 0;
+}
+EXPORT_SYMBOL(rtnl_configure_link);
+
+struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
+	char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
 {
 	int err;
 	struct net_device *dev;
+	unsigned int num_queues = 1;
+	unsigned int real_num_queues = 1;
 
+	if (ops->get_tx_queues) {
+		err = ops->get_tx_queues(src_net, tb, &num_queues,
+					 &real_num_queues);
+		if (err)
+			goto err;
+	}
 	err = -ENOMEM;
-	dev = alloc_netdev(ops->priv_size, ifname, ops->setup);
+	dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues);
 	if (!dev)
 		goto err;
 
+	dev_net_set(dev, net);
+	dev->rtnl_link_ops = ops;
+	dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
+	dev->real_num_tx_queues = real_num_queues;
+
 	if (strchr(dev->name, '%')) {
 		err = dev_alloc_name(dev, dev->name);
 		if (err < 0)
 			goto err_free;
 	}
 
-	dev_net_set(dev, net);
-	dev->rtnl_link_ops = ops;
-
 	if (tb[IFLA_MTU])
 		dev->mtu = nla_get_u32(tb[IFLA_MTU]);
 	if (tb[IFLA_ADDRESS])
@@ -1030,6 +1550,7 @@ err_free:
 err:
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(rtnl_create_link);
 
 static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
@@ -1063,7 +1584,8 @@ replay:
 	else
 		dev = NULL;
 
-	if ((err = validate_linkmsg(dev, tb)) < 0)
+	err = validate_linkmsg(dev, tb);
+	if (err < 0)
 		return err;
 
 	if (tb[IFLA_LINKINFO]) {
@@ -1084,6 +1606,7 @@ replay:
 
 	if (1) {
 		struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL;
+		struct net *dest_net;
 
 		if (ops) {
 			if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
@@ -1126,7 +1649,7 @@ replay:
 		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
 			return -ENODEV;
 
-		if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change)
+		if (ifm->ifi_index)
 			return -EOPNOTSUPP;
 		if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
 			return -EOPNOTSUPP;
@@ -1148,17 +1671,26 @@ replay:
 		if (!ifname[0])
 			snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
 
-		dev = rtnl_create_link(net, ifname, ops, tb);
+		dest_net = rtnl_link_get_net(net, tb);
+		dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
 
 		if (IS_ERR(dev))
 			err = PTR_ERR(dev);
 		else if (ops->newlink)
-			err = ops->newlink(dev, tb, data);
+			err = ops->newlink(net, dev, tb, data);
 		else
 			err = register_netdevice(dev);
 
 		if (err < 0 && !IS_ERR(dev))
 			free_netdev(dev);
+		if (err < 0)
+			goto out;
+
+		err = rtnl_configure_link(dev, ifm);
+		if (err < 0)
+			unregister_netdevice(dev);
+out:
+		put_net(dest_net);
 		return err;
 	}
 }
@@ -1167,6 +1699,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ifinfomsg *ifm;
+	char ifname[IFNAMSIZ];
 	struct nlattr *tb[IFLA_MAX+1];
 	struct net_device *dev = NULL;
 	struct sk_buff *nskb;
@@ -1176,19 +1709,23 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (err < 0)
 		return err;
 
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+
 	ifm = nlmsg_data(nlh);
-	if (ifm->ifi_index > 0) {
-		dev = dev_get_by_index(net, ifm->ifi_index);
-		if (dev == NULL)
-			return -ENODEV;
-	} else
+	if (ifm->ifi_index > 0)
+		dev = __dev_get_by_index(net, ifm->ifi_index);
+	else if (tb[IFLA_IFNAME])
+		dev = __dev_get_by_name(net, ifname);
+	else
 		return -EINVAL;
 
+	if (dev == NULL)
+		return -ENODEV;
+
 	nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
-	if (nskb == NULL) {
-		err = -ENOBUFS;
-		goto errout;
-	}
+	if (nskb == NULL)
+		return -ENOBUFS;
 
 	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
 			       nlh->nlmsg_seq, 0, 0);
@@ -1196,11 +1733,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(nskb);
-		goto errout;
-	}
-	err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
-errout:
-	dev_put(dev);
+	} else
+		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
 
 	return err;
 }
@@ -1212,7 +1746,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (s_idx == 0)
 		s_idx = 1;
-	for (idx=1; idx<NPROTO; idx++) {
+	for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
 		int type = cb->nlh->nlmsg_type-RTM_BASE;
 		if (idx < s_idx || idx == PF_PACKET)
 			continue;
@@ -1279,17 +1813,14 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
 		return 0;
 
-	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO)
-		return -EAFNOSUPPORT;
-
+	family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
 	sz_idx = type>>2;
 	kind = type&3;
 
 	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+	if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
 		struct sock *rtnl;
 		rtnl_dumpit_func dumpit;
 
@@ -1312,7 +1843,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	if (nlh->nlmsg_len > min_len) {
 		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-		struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
 
 		while (RTA_OK(attr, attrlen)) {
 			unsigned flavor = attr->rta_type;
@@ -1344,18 +1875,16 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	struct net_device *dev = ptr;
 
 	switch (event) {
-	case NETDEV_UNREGISTER:
-		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
-		break;
-	case NETDEV_REGISTER:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
-		break;
 	case NETDEV_UP:
 	case NETDEV_DOWN:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
-		break;
+	case NETDEV_PRE_UP:
+	case NETDEV_POST_INIT:
+	case NETDEV_REGISTER:
 	case NETDEV_CHANGE:
+	case NETDEV_PRE_TYPE_CHANGE:
 	case NETDEV_GOING_DOWN:
+	case NETDEV_UNREGISTER:
+	case NETDEV_UNREGISTER_BATCH:
 		break;
 	default:
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
@@ -1369,7 +1898,7 @@ static struct notifier_block rtnetlink_dev_notifier = {
 };
 
 
-static int rtnetlink_net_init(struct net *net)
+static int __net_init rtnetlink_net_init(struct net *net)
 {
 	struct sock *sk;
 	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
@@ -1380,7 +1909,7 @@ static int rtnetlink_net_init(struct net *net)
 	return 0;
 }
 
-static void rtnetlink_net_exit(struct net *net)
+static void __net_exit rtnetlink_net_exit(struct net *net)
 {
 	netlink_kernel_release(net->rtnl);
 	net->rtnl = NULL;
@@ -1418,14 +1947,3 @@ void __init rtnetlink_init(void)
 	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
 }
 
-EXPORT_SYMBOL(__rta_fill);
-EXPORT_SYMBOL(rtnetlink_put_metrics);
-EXPORT_SYMBOL(rtnl_lock);
-EXPORT_SYMBOL(rtnl_trylock);
-EXPORT_SYMBOL(rtnl_unlock);
-EXPORT_SYMBOL(rtnl_is_locked);
-EXPORT_SYMBOL(rtnl_unicast);
-EXPORT_SYMBOL(rtnl_notify);
-EXPORT_SYMBOL(rtnl_set_sk_err);
-EXPORT_SYMBOL(rtnl_create_link);
-EXPORT_SYMBOL(ifla_policy);
diff --git a/net/core/scm.c b/net/core/scm.c
index b7ba91b074b..bbe45445080 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -26,6 +26,7 @@
 #include <linux/security.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
+#include <linux/slab.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -78,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 			return -ENOMEM;
 		*fplp = fpl;
 		fpl->count = 0;
+		fpl->max = SCM_MAX_FD;
 	}
 	fpp = &fpl->fp[fpl->count];
 
-	if (fpl->count + num > SCM_MAX_FD)
+	if (fpl->count + num > fpl->max)
 		return -EINVAL;
 
 	/*
@@ -129,6 +131,7 @@ void __scm_destroy(struct scm_cookie *scm)
 		}
 	}
 }
+EXPORT_SYMBOL(__scm_destroy);
 
 int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 {
@@ -156,6 +159,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 		switch (cmsg->cmsg_type)
 		{
 		case SCM_RIGHTS:
+			if (!sock->ops || sock->ops->family != PF_UNIX)
+				goto error;
 			err=scm_fp_copy(cmsg, &p->fp);
 			if (err<0)
 				goto error;
@@ -167,6 +172,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 			err = scm_check_creds(&p->creds);
 			if (err)
 				goto error;
+
+			if (pid_vnr(p->pid) != p->creds.pid) {
+				struct pid *pid;
+				err = -ESRCH;
+				pid = find_get_pid(p->creds.pid);
+				if (!pid)
+					goto error;
+				put_pid(p->pid);
+				p->pid = pid;
+			}
+
+			if ((p->cred->euid != p->creds.uid) ||
+				(p->cred->egid != p->creds.gid)) {
+				struct cred *cred;
+				err = -ENOMEM;
+				cred = prepare_creds();
+				if (!cred)
+					goto error;
+
+				cred->uid = cred->euid = p->creds.uid;
+				cred->gid = cred->egid = p->creds.uid;
+				put_cred(p->cred);
+				p->cred = cred;
+			}
 			break;
 		default:
 			goto error;
@@ -184,6 +213,7 @@ error:
 	scm_destroy(p);
 	return err;
 }
+EXPORT_SYMBOL(__scm_send);
 
 int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 {
@@ -222,6 +252,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 out:
 	return err;
 }
+EXPORT_SYMBOL(put_cmsg);
 
 void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 {
@@ -291,6 +322,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 	 */
 	__scm_destroy(scm);
 }
+EXPORT_SYMBOL(scm_detach_fds);
 
 struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 {
@@ -300,17 +332,13 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 	if (!fpl)
 		return NULL;
 
-	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
+			  GFP_KERNEL);
 	if (new_fpl) {
-		for (i=fpl->count-1; i>=0; i--)
+		for (i = 0; i < fpl->count; i++)
 			get_file(fpl->fp[i]);
-		memcpy(new_fpl, fpl, sizeof(*fpl));
+		new_fpl->max = new_fpl->count;
 	}
 	return new_fpl;
 }
-
-EXPORT_SYMBOL(__scm_destroy);
-EXPORT_SYMBOL(__scm_send);
-EXPORT_SYMBOL(put_cmsg);
-EXPORT_SYMBOL(scm_detach_fds);
 EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
deleted file mode 100644
index 79687dfd695..00000000000
--- a/net/core/skb_dma_map.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/* skb_dma_map.c: DMA mapping helpers for socket buffers.
- *
- * Copyright (C) David S. Miller <davem@davemloft.net>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/dma-mapping.h>
-#include <linux/skbuff.h>
-
-int skb_dma_map(struct device *dev, struct sk_buff *skb,
-		enum dma_data_direction dir)
-{
-	struct skb_shared_info *sp = skb_shinfo(skb);
-	dma_addr_t map;
-	int i;
-
-	map = dma_map_single(dev, skb->data,
-			     skb_headlen(skb), dir);
-	if (dma_mapping_error(dev, map))
-		goto out_err;
-
-	sp->dma_head = map;
-	for (i = 0; i < sp->nr_frags; i++) {
-		skb_frag_t *fp = &sp->frags[i];
-
-		map = dma_map_page(dev, fp->page, fp->page_offset,
-				   fp->size, dir);
-		if (dma_mapping_error(dev, map))
-			goto unwind;
-		sp->dma_maps[i] = map;
-	}
-
-	return 0;
-
-unwind:
-	while (--i >= 0) {
-		skb_frag_t *fp = &sp->frags[i];
-
-		dma_unmap_page(dev, sp->dma_maps[i],
-			       fp->size, dir);
-	}
-	dma_unmap_single(dev, sp->dma_head,
-			 skb_headlen(skb), dir);
-out_err:
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(skb_dma_map);
-
-void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
-		   enum dma_data_direction dir)
-{
-	struct skb_shared_info *sp = skb_shinfo(skb);
-	int i;
-
-	dma_unmap_single(dev, sp->dma_head,
-			 skb_headlen(skb), dir);
-	for (i = 0; i < sp->nr_frags; i++) {
-		skb_frag_t *fp = &sp->frags[i];
-
-		dma_unmap_page(dev, sp->dma_maps[i],
-			       fp->size, dir);
-	}
-}
-EXPORT_SYMBOL(skb_dma_unmap);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9e0597d189b..19d6c21220f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -93,7 +93,7 @@ static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
 
 
 /* Pipe buffer operations for a socket. */
-static struct pipe_buf_operations sock_pipe_buf_ops = {
+static const struct pipe_buf_operations sock_pipe_buf_ops = {
 	.can_merge = 0,
 	.map = generic_pipe_buf_map,
 	.unmap = generic_pipe_buf_unmap,
@@ -117,7 +117,7 @@ static struct pipe_buf_operations sock_pipe_buf_ops = {
  *
  *	Out of line support code for skb_put(). Not user callable.
  */
-void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
 			  "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
-EXPORT_SYMBOL(skb_over_panic);
 
 /**
  *	skb_under_panic	- 	private function
@@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic);
  *	Out of line support code for skb_push(). Not user callable.
  */
 
-void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
 			  "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
-EXPORT_SYMBOL(skb_under_panic);
 
 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  *	'private' fields and also do memory statistics to find all the
@@ -183,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
 	if (!skb)
 		goto out;
+	prefetchw(skb);
 
 	size = SKB_DATA_ALIGN(size);
 	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
 			gfp_mask, node);
 	if (!data)
 		goto nodata;
+	prefetchw(data + size);
 
 	/*
 	 * Only clear those fields we need to clear, not those that we will
@@ -202,23 +202,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->data = data;
 	skb_reset_tail_pointer(skb);
 	skb->end = skb->tail + size;
-	kmemcheck_annotate_bitfield(skb, flags1);
-	kmemcheck_annotate_bitfield(skb, flags2);
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->mac_header = ~0U;
 #endif
 
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	shinfo->nr_frags  = 0;
-	shinfo->gso_size = 0;
-	shinfo->gso_segs = 0;
-	shinfo->gso_type = 0;
-	shinfo->ip6_frag_id = 0;
-	shinfo->tx_flags.flags = 0;
-	skb_frag_list_init(skb);
-	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
@@ -256,10 +247,9 @@ EXPORT_SYMBOL(__alloc_skb);
 struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 		unsigned int length, gfp_t gfp_mask)
 {
-	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 	struct sk_buff *skb;
 
-	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
+	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
 	if (likely(skb)) {
 		skb_reserve(skb, NET_SKB_PAD);
 		skb->dev = dev;
@@ -268,16 +258,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 }
 EXPORT_SYMBOL(__netdev_alloc_skb);
 
-struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
-{
-	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
-	struct page *page;
-
-	page = alloc_pages_node(node, gfp_mask, 0);
-	return page;
-}
-EXPORT_SYMBOL(__netdev_alloc_page);
-
 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 		int size)
 {
@@ -347,7 +327,7 @@ static void skb_release_data(struct sk_buff *skb)
 				put_page(skb_shinfo(skb)->frags[i].page);
 		}
 
-		if (skb_has_frags(skb))
+		if (skb_has_frag_list(skb))
 			skb_drop_fraglist(skb);
 
 		kfree(skb->head);
@@ -473,6 +453,7 @@ void consume_skb(struct sk_buff *skb)
 		smp_rmb();
 	else if (likely(!atomic_dec_and_test(&skb->users)))
 		return;
+	trace_consume_skb(skb);
 	__kfree_skb(skb);
 }
 EXPORT_SYMBOL(consume_skb);
@@ -489,37 +470,34 @@ EXPORT_SYMBOL(consume_skb);
  *	reference count dropping and cleans up the skbuff as if it
  *	just came from __alloc_skb().
  */
-int skb_recycle_check(struct sk_buff *skb, int skb_size)
+bool skb_recycle_check(struct sk_buff *skb, int skb_size)
 {
 	struct skb_shared_info *shinfo;
 
+	if (irqs_disabled())
+		return false;
+
 	if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
-		return 0;
+		return false;
 
 	skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
 	if (skb_end_pointer(skb) - skb->head < skb_size)
-		return 0;
+		return false;
 
 	if (skb_shared(skb) || skb_cloned(skb))
-		return 0;
+		return false;
 
 	skb_release_head_state(skb);
+
 	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	shinfo->nr_frags = 0;
-	shinfo->gso_size = 0;
-	shinfo->gso_segs = 0;
-	shinfo->gso_type = 0;
-	shinfo->ip6_frag_id = 0;
-	shinfo->tx_flags.flags = 0;
-	skb_frag_list_init(skb);
-	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->data = skb->head + NET_SKB_PAD;
 	skb_reset_tail_pointer(skb);
 
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL(skb_recycle_check);
 
@@ -530,7 +508,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->transport_header	= old->transport_header;
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
-	skb_dst_set(new, dst_clone(skb_dst(old)));
+	skb_dst_copy(new, old);
+	new->rxhash		= old->rxhash;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
@@ -541,12 +520,13 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->ip_summed		= old->ip_summed;
 	skb_copy_queue_mapping(new, old);
 	new->priority		= old->priority;
+	new->deliver_no_wcard	= old->deliver_no_wcard;
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	new->ipvs_property	= old->ipvs_property;
 #endif
 	new->protocol		= old->protocol;
 	new->mark		= old->mark;
-	new->iif		= old->iif;
+	new->skb_iif		= old->skb_iif;
 	__nf_copy(new, old);
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
@@ -559,9 +539,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #endif
 #endif
 	new->vlan_tci		= old->vlan_tci;
-#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
-	new->do_not_encrypt	= old->do_not_encrypt;
-#endif
 
 	skb_copy_secmark(new, old);
 }
@@ -696,16 +673,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 
 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 {
-	int headerlen = skb->data - skb->head;
-	/*
-	 *	Allocate the copy buffer
-	 */
-	struct sk_buff *n;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
-	n = alloc_skb(skb->end + skb->data_len, gfp_mask);
-#else
-	n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
-#endif
+	int headerlen = skb_headroom(skb);
+	unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
+	struct sk_buff *n = alloc_skb(size, gfp_mask);
+
 	if (!n)
 		return NULL;
 
@@ -737,20 +708,14 @@ EXPORT_SYMBOL(skb_copy);
 
 struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	/*
-	 *	Allocate the copy buffer
-	 */
-	struct sk_buff *n;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
-	n = alloc_skb(skb->end, gfp_mask);
-#else
-	n = alloc_skb(skb->end - skb->head, gfp_mask);
-#endif
+	unsigned int size = skb_end_pointer(skb) - skb->head;
+	struct sk_buff *n = alloc_skb(size, gfp_mask);
+
 	if (!n)
 		goto out;
 
 	/* Set the data pointer */
-	skb_reserve(n, skb->data - skb->head);
+	skb_reserve(n, skb_headroom(skb));
 	/* Set the tail pointer and length */
 	skb_put(n, skb_headlen(skb));
 	/* Copy the bytes */
@@ -770,7 +735,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 		skb_shinfo(n)->nr_frags = i;
 	}
 
-	if (skb_has_frags(skb)) {
+	if (skb_has_frag_list(skb)) {
 		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
 		skb_clone_fraglist(n);
 	}
@@ -802,12 +767,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 {
 	int i;
 	u8 *data;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
-	int size = nhead + skb->end + ntail;
-#else
-	int size = nhead + (skb->end - skb->head) + ntail;
-#endif
+	int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
 	long off;
+	bool fastpath;
 
 	BUG_ON(nhead < 0);
 
@@ -816,31 +778,56 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	size = SKB_DATA_ALIGN(size);
 
+	/* Check if we can avoid taking references on fragments if we own
+	 * the last reference on skb->head. (see skb_release_data())
+	 */
+	if (!skb->cloned)
+		fastpath = true;
+	else {
+		int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
+
+		fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
+	}
+
+	if (fastpath &&
+	    size + sizeof(struct skb_shared_info) <= ksize(skb->head)) {
+		memmove(skb->head + size, skb_shinfo(skb),
+			offsetof(struct skb_shared_info,
+				 frags[skb_shinfo(skb)->nr_frags]));
+		memmove(skb->head + nhead, skb->head,
+			skb_tail_pointer(skb) - skb->head);
+		off = nhead;
+		goto adjust_others;
+	}
+
 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
 	if (!data)
 		goto nodata;
 
 	/* Copy only real data... and, alas, header. This should be
-	 * optimized for the cases when header is void. */
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
-	memcpy(data + nhead, skb->head, skb->tail);
-#else
-	memcpy(data + nhead, skb->head, skb->tail - skb->head);
-#endif
-	memcpy(data + size, skb_end_pointer(skb),
-	       sizeof(struct skb_shared_info));
+	 * optimized for the cases when header is void.
+	 */
+	memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
 
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-		get_page(skb_shinfo(skb)->frags[i].page);
+	memcpy((struct skb_shared_info *)(data + size),
+	       skb_shinfo(skb),
+	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
 
-	if (skb_has_frags(skb))
-		skb_clone_fraglist(skb);
+	if (fastpath) {
+		kfree(skb->head);
+	} else {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+			get_page(skb_shinfo(skb)->frags[i].page);
 
-	skb_release_data(skb);
+		if (skb_has_frag_list(skb))
+			skb_clone_fraglist(skb);
 
+		skb_release_data(skb);
+	}
 	off = (data + nhead) - skb->head;
 
 	skb->head     = data;
+adjust_others:
 	skb->data    += off;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->end      = size;
@@ -854,7 +841,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->network_header   += off;
 	if (skb_mac_header_was_set(skb))
 		skb->mac_header += off;
-	skb->csum_start       += nhead;
+	/* Only adjust this if it actually is csum_start rather than csum */
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		skb->csum_start += nhead;
 	skb->cloned   = 0;
 	skb->hdr_len  = 0;
 	skb->nohdr    = 0;
@@ -941,7 +930,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 	copy_skb_header(n, skb);
 
 	off                  = newheadroom - oldheadroom;
-	n->csum_start       += off;
+	if (n->ip_summed == CHECKSUM_PARTIAL)
+		n->csum_start += off;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	n->transport_header += off;
 	n->network_header   += off;
@@ -1051,7 +1041,7 @@ EXPORT_SYMBOL(skb_push);
  */
 unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
 {
-	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+	return skb_pull_inline(skb, len);
 }
 EXPORT_SYMBOL(skb_pull);
 
@@ -1107,7 +1097,7 @@ drop_pages:
 		for (; i < nfrags; i++)
 			put_page(skb_shinfo(skb)->frags[i].page);
 
-		if (skb_has_frags(skb))
+		if (skb_has_frag_list(skb))
 			skb_drop_fraglist(skb);
 		goto done;
 	}
@@ -1202,7 +1192,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
 	/* Optimization: no fragments, no reasons to preestimate
 	 * size of pulled pages. Superb.
 	 */
-	if (!skb_has_frags(skb))
+	if (!skb_has_frag_list(skb))
 		goto pull_pages;
 
 	/* Estimate size of pulled pages. */
@@ -1417,12 +1407,13 @@ new_page:
 /*
  * Fill page/offset/length into spd, if it can hold more pages.
  */
-static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+static inline int spd_fill_page(struct splice_pipe_desc *spd,
+				struct pipe_inode_info *pipe, struct page *page,
 				unsigned int *len, unsigned int offset,
 				struct sk_buff *skb, int linear,
 				struct sock *sk)
 {
-	if (unlikely(spd->nr_pages == PIPE_BUFFERS))
+	if (unlikely(spd->nr_pages == pipe->buffers))
 		return 1;
 
 	if (linear) {
@@ -1458,7 +1449,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
 				   unsigned int plen, unsigned int *off,
 				   unsigned int *len, struct sk_buff *skb,
 				   struct splice_pipe_desc *spd, int linear,
-				   struct sock *sk)
+				   struct sock *sk,
+				   struct pipe_inode_info *pipe)
 {
 	if (!*len)
 		return 1;
@@ -1481,7 +1473,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
 		/* the linear region may spread across several pages  */
 		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
 
-		if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk))
+		if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
 			return 1;
 
 		__segment_seek(&page, &poff, &plen, flen);
@@ -1496,9 +1488,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
  * Map linear and fragment data from the skb to spd. It reports failure if the
  * pipe is full or if we already spliced the requested length.
  */
-static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
-			     unsigned int *len, struct splice_pipe_desc *spd,
-			     struct sock *sk)
+static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
+			     unsigned int *offset, unsigned int *len,
+			     struct splice_pipe_desc *spd, struct sock *sk)
 {
 	int seg;
 
@@ -1508,7 +1500,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd, 1, sk))
+			     offset, len, skb, spd, 1, sk, pipe))
 		return 1;
 
 	/*
@@ -1518,7 +1510,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(f->page, f->page_offset, f->size,
-				     offset, len, skb, spd, 0, sk))
+				     offset, len, skb, spd, 0, sk, pipe))
 			return 1;
 	}
 
@@ -1535,8 +1527,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int tlen,
 		    unsigned int flags)
 {
-	struct partial_page partial[PIPE_BUFFERS];
-	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_DEF_BUFFERS];
+	struct page *pages[PIPE_DEF_BUFFERS];
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
@@ -1546,12 +1538,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 	};
 	struct sk_buff *frag_iter;
 	struct sock *sk = skb->sk;
+	int ret = 0;
+
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
 
 	/*
 	 * __skb_splice_bits() only fails if the output has no room left,
 	 * so no point in going over the frag_list for the error case.
 	 */
-	if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk))
+	if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
 		goto done;
 	else if (!tlen)
 		goto done;
@@ -1562,14 +1558,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 	skb_walk_frags(skb, frag_iter) {
 		if (!tlen)
 			break;
-		if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk))
+		if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
 			break;
 	}
 
 done:
 	if (spd.nr_pages) {
-		int ret;
-
 		/*
 		 * Drop the socket lock, otherwise we have reverse
 		 * locking dependencies between sk_lock and i_mutex
@@ -1582,10 +1576,10 @@ done:
 		release_sock(sk);
 		ret = splice_to_pipe(pipe, &spd);
 		lock_sock(sk);
-		return ret;
 	}
 
-	return 0;
+	splice_shrink_spd(pipe, &spd);
+	return ret;
 }
 
 /**
@@ -1830,7 +1824,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 	long csstart;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		csstart = skb->csum_start - skb_headroom(skb);
+		csstart = skb_checksum_start_offset(skb);
 	else
 		csstart = skb_headlen(skb);
 
@@ -2327,7 +2321,7 @@ next_skb:
 		st->frag_data = NULL;
 	}
 
-	if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) {
+	if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
 		st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
 		st->frag_idx = 0;
 		goto next_skb;
@@ -2490,7 +2484,6 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 	skb_postpull_rcsum(skb, skb->data, len);
 	return skb->data += len;
 }
-
 EXPORT_SYMBOL_GPL(skb_pull_rcsum);
 
 /**
@@ -2578,6 +2571,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		__copy_skb_header(nskb, skb);
 		nskb->mac_len = skb->mac_len;
 
+		/* nskb and skb might have different headroom */
+		if (nskb->ip_summed == CHECKSUM_PARTIAL)
+			nskb->csum_start += skb_headroom(nskb) - headroom;
+
 		skb_reset_mac_header(nskb);
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
@@ -2704,10 +2701,11 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 		NAPI_GRO_CB(skb)->free = 1;
 		goto done;
-	}
+	} else if (skb_gro_len(p) != pinfo->gso_size)
+		return -E2BIG;
 
 	headroom = skb_headroom(p);
-	nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
+	nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
 	if (unlikely(!nskb))
 		return -ENOMEM;
 
@@ -2728,6 +2726,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
 	skb_shinfo(nskb)->frag_list = p;
 	skb_shinfo(nskb)->gso_size = pinfo->gso_size;
+	pinfo->gso_size = 0;
 	skb_header_release(p);
 	nskb->prev = p;
 
@@ -2892,7 +2891,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 		return -ENOMEM;
 
 	/* Easy case. Most of packets will go this way. */
-	if (!skb_has_frags(skb)) {
+	if (!skb_has_frag_list(skb)) {
 		/* A little of trouble, not enough of space for trailer.
 		 * This should not happen, when stack is tuned to generate
 		 * good frames. OK, on miss we reallocate and reserve even more
@@ -2927,7 +2926,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 
 		if (skb1->next == NULL && tailbits) {
 			if (skb_shinfo(skb1)->nr_frags ||
-			    skb_has_frags(skb1) ||
+			    skb_has_frag_list(skb1) ||
 			    skb_tailroom(skb1) < tailbits)
 				ntail = tailbits + 128;
 		}
@@ -2936,7 +2935,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 		    skb_cloned(skb1) ||
 		    ntail ||
 		    skb_shinfo(skb1)->nr_frags ||
-		    skb_has_frags(skb1)) {
+		    skb_has_frag_list(skb1)) {
 			struct sk_buff *skb2;
 
 			/* Fuck, we are miserable poor guys... */
@@ -2970,6 +2969,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 }
 EXPORT_SYMBOL_GPL(skb_cow_data);
 
+static void sock_rmem_free(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+}
+
+/*
+ * Note: We dont mem charge error packets (no sk_forward_alloc changes)
+ */
+int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+	    (unsigned)sk->sk_rcvbuf)
+		return -ENOMEM;
+
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_rmem_free;
+	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+	skb_queue_tail(&sk->sk_error_queue, skb);
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb->len);
+	return 0;
+}
+EXPORT_SYMBOL(sock_queue_err_skb);
+
 void skb_tstamp_tx(struct sk_buff *orig_skb,
 		struct skb_shared_hwtstamps *hwtstamps)
 {
@@ -2991,7 +3018,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	} else {
 		/*
 		 * no hardware time stamps available,
-		 * so keep the skb_shared_tx and only
+		 * so keep the shared tx_flags and only
 		 * store software time stamp
 		 */
 		skb->tstamp = ktime_get_real();
@@ -3001,7 +3028,9 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+
 	err = sock_queue_err_skb(sk, skb);
+
 	if (err)
 		kfree_skb(skb);
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index b0ba569bc97..a658aeb6d55 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -110,6 +110,7 @@
 #include <linux/tcp.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
+#include <linux/user_namespace.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -123,6 +124,7 @@
 #include <linux/net_tstamp.h>
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
+#include <net/cls_cgroup.h>
 
 #include <linux/filter.h>
 
@@ -142,7 +144,7 @@ static struct lock_class_key af_family_slock_keys[AF_MAX];
  * strings build-time, so that runtime initialization of socket
  * locks is fast):
  */
-static const char *af_family_key_strings[AF_MAX+1] = {
+static const char *const af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
@@ -155,10 +157,10 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
-  "sk_lock-AF_IEEE802154",
+  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
   "sk_lock-AF_MAX"
 };
-static const char *af_family_slock_key_strings[AF_MAX+1] = {
+static const char *const af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
@@ -171,10 +173,10 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
-  "slock-AF_IEEE802154",
+  "slock-AF_IEEE802154", "slock-AF_CAIF" ,
   "slock-AF_MAX"
 };
-static const char *af_family_clock_key_strings[AF_MAX+1] = {
+static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
@@ -187,7 +189,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
-  "clock-AF_IEEE802154",
+  "clock-AF_IEEE802154", "clock-AF_CAIF" ,
   "clock-AF_MAX"
 };
 
@@ -217,6 +219,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
+#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+int net_cls_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_cls_subsys_id);
+#endif
+
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
 	struct timeval tv;
@@ -274,25 +281,27 @@ static void sock_disable_timestamp(struct sock *sk, int flag)
 
 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
-	int err = 0;
+	int err;
 	int skb_len;
+	unsigned long flags;
+	struct sk_buff_head *list = &sk->sk_receive_queue;
 
 	/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
 	   number of warnings when compiling with -W --ANK
 	 */
 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 	    (unsigned)sk->sk_rcvbuf) {
-		err = -ENOMEM;
-		goto out;
+		atomic_inc(&sk->sk_drops);
+		return -ENOMEM;
 	}
 
 	err = sk_filter(sk, skb);
 	if (err)
-		goto out;
+		return err;
 
 	if (!sk_rmem_schedule(sk, skb->truesize)) {
-		err = -ENOBUFS;
-		goto out;
+		atomic_inc(&sk->sk_drops);
+		return -ENOBUFS;
 	}
 
 	skb->dev = NULL;
@@ -305,12 +314,19 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	skb_len = skb->len;
 
-	skb_queue_tail(&sk->sk_receive_queue, skb);
+	/* we escape from rcu protected region, make sure we dont leak
+	 * a norefcounted dst
+	 */
+	skb_dst_force(skb);
+
+	spin_lock_irqsave(&list->lock, flags);
+	skb->dropcount = atomic_read(&sk->sk_drops);
+	__skb_queue_tail(list, skb);
+	spin_unlock_irqrestore(&list->lock, flags);
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_data_ready(sk, skb_len);
-out:
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL(sock_queue_rcv_skb);
 
@@ -323,6 +339,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 
 	skb->dev = NULL;
 
+	if (sk_rcvqueues_full(sk, skb)) {
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
 	if (nested)
 		bh_lock_sock_nested(sk);
 	else
@@ -336,8 +356,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else
-		sk_add_backlog(sk, skb);
+	} else if (sk_add_backlog(sk, skb)) {
+		bh_unlock_sock(sk);
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
+
 	bh_unlock_sock(sk);
 out:
 	sock_put(sk);
@@ -348,12 +372,19 @@ discard_and_relse:
 }
 EXPORT_SYMBOL(sk_receive_skb);
 
+void sk_reset_txq(struct sock *sk)
+{
+	sk_tx_queue_clear(sk);
+}
+EXPORT_SYMBOL(sk_reset_txq);
+
 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 {
-	struct dst_entry *dst = sk->sk_dst_cache;
+	struct dst_entry *dst = __sk_dst_get(sk);
 
 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
-		sk->sk_dst_cache = NULL;
+		sk_tx_queue_clear(sk);
+		rcu_assign_pointer(sk->sk_dst_cache, NULL);
 		dst_release(dst);
 		return NULL;
 	}
@@ -406,17 +437,18 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
 	if (copy_from_user(devname, optval, optlen))
 		goto out;
 
-	if (devname[0] == '\0') {
-		index = 0;
-	} else {
-		struct net_device *dev = dev_get_by_name(net, devname);
+	index = 0;
+	if (devname[0] != '\0') {
+		struct net_device *dev;
 
+		rcu_read_lock();
+		dev = dev_get_by_name_rcu(net, devname);
+		if (dev)
+			index = dev->ifindex;
+		rcu_read_unlock();
 		ret = -ENODEV;
 		if (!dev)
 			goto out;
-
-		index = dev->ifindex;
-		dev_put(dev);
 	}
 
 	lock_sock(sk);
@@ -446,7 +478,7 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
  */
 
 int sock_setsockopt(struct socket *sock, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int val;
@@ -482,6 +514,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 		sk->sk_reuse = valbool;
 		break;
 	case SO_TYPE:
+	case SO_PROTOCOL:
+	case SO_DOMAIN:
 	case SO_ERROR:
 		ret = -ENOPROTOOPT;
 		break;
@@ -631,7 +665,7 @@ set_rcvbuf:
 
 	case SO_TIMESTAMPING:
 		if (val & ~SOF_TIMESTAMPING_MASK) {
-			ret = EINVAL;
+			ret = -EINVAL;
 			break;
 		}
 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
@@ -700,6 +734,12 @@ set_rcvbuf:
 
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
+	case SO_RXQ_OVFL:
+		if (valbool)
+			sock_set_flag(sk, SOCK_RXQ_OVFL);
+		else
+			sock_reset_flag(sk, SOCK_RXQ_OVFL);
+		break;
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -710,6 +750,20 @@ set_rcvbuf:
 EXPORT_SYMBOL(sock_setsockopt);
 
 
+void cred_to_ucred(struct pid *pid, const struct cred *cred,
+		   struct ucred *ucred)
+{
+	ucred->pid = pid_vnr(pid);
+	ucred->uid = ucred->gid = -1;
+	if (cred) {
+		struct user_namespace *current_ns = current_user_ns();
+
+		ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
+		ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
+	}
+}
+EXPORT_SYMBOL_GPL(cred_to_ucred);
+
 int sock_getsockopt(struct socket *sock, int level, int optname,
 		    char __user *optval, int __user *optlen)
 {
@@ -721,7 +775,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		struct timeval tm;
 	} v;
 
-	unsigned int lv = sizeof(int);
+	int lv = sizeof(int);
 	int len;
 
 	if (get_user(len, optlen))
@@ -764,6 +818,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_type;
 		break;
 
+	case SO_PROTOCOL:
+		v.val = sk->sk_protocol;
+		break;
+
+	case SO_DOMAIN:
+		v.val = sk->sk_family;
+		break;
+
 	case SO_ERROR:
 		v.val = -sock_error(sk);
 		if (v.val == 0)
@@ -854,11 +916,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_PEERCRED:
-		if (len > sizeof(sk->sk_peercred))
-			len = sizeof(sk->sk_peercred);
-		if (copy_to_user(optval, &sk->sk_peercred, len))
+	{
+		struct ucred peercred;
+		if (len > sizeof(peercred))
+			len = sizeof(peercred);
+		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+		if (copy_to_user(optval, &peercred, len))
 			return -EFAULT;
 		goto lenout;
+	}
 
 	case SO_PEERNAME:
 	{
@@ -891,6 +957,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_mark;
 		break;
 
+	case SO_RXQ_OVFL:
+		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -919,19 +989,57 @@ static inline void sock_lock_init(struct sock *sk)
 			af_family_keys + sk->sk_family);
 }
 
+/*
+ * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
+ * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
+ */
 static void sock_copy(struct sock *nsk, const struct sock *osk)
 {
 #ifdef CONFIG_SECURITY_NETWORK
 	void *sptr = nsk->sk_security;
 #endif
+	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
+
+	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
+	       osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
 
-	memcpy(nsk, osk, osk->sk_prot->obj_size);
 #ifdef CONFIG_SECURITY_NETWORK
 	nsk->sk_security = sptr;
 	security_sk_clone(osk, nsk);
 #endif
 }
 
+/*
+ * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * un-modified. Special care is taken when initializing object to zero.
+ */
+static inline void sk_prot_clear_nulls(struct sock *sk, int size)
+{
+	if (offsetof(struct sock, sk_node.next) != 0)
+		memset(sk, 0, offsetof(struct sock, sk_node.next));
+	memset(&sk->sk_node.pprev, 0,
+	       size - offsetof(struct sock, sk_node.pprev));
+}
+
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
+{
+	unsigned long nulls1, nulls2;
+
+	nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
+	nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
+	if (nulls1 > nulls2)
+		swap(nulls1, nulls2);
+
+	if (nulls1 != 0)
+		memset((char *)sk, 0, nulls1);
+	memset((char *)sk + nulls1 + sizeof(void *), 0,
+	       nulls2 - nulls1 - sizeof(void *));
+	memset((char *)sk + nulls2 + sizeof(void *), 0,
+	       size - nulls2 - sizeof(void *));
+}
+EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
+
 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 		int family)
 {
@@ -939,9 +1047,17 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 	struct kmem_cache *slab;
 
 	slab = prot->slab;
-	if (slab != NULL)
-		sk = kmem_cache_alloc(slab, priority);
-	else
+	if (slab != NULL) {
+		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
+		if (!sk)
+			return sk;
+		if (priority & __GFP_ZERO) {
+			if (prot->clear_sk)
+				prot->clear_sk(sk, prot->obj_size);
+			else
+				sk_prot_clear_nulls(sk, prot->obj_size);
+		}
+	} else
 		sk = kmalloc(prot->obj_size, priority);
 
 	if (sk != NULL) {
@@ -952,6 +1068,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 
 		if (!try_module_get(prot->owner))
 			goto out_free_sec;
+		sk_tx_queue_clear(sk);
 	}
 
 	return sk;
@@ -982,6 +1099,20 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
 	module_put(owner);
 }
 
+#ifdef CONFIG_CGROUPS
+void sock_update_classid(struct sock *sk)
+{
+	u32 classid;
+
+	rcu_read_lock();  /* doing current task, which cannot vanish. */
+	classid = task_cls_classid(current);
+	rcu_read_unlock();
+	if (classid && classid != sk->sk_classid)
+		sk->sk_classid = classid;
+}
+EXPORT_SYMBOL(sock_update_classid);
+#endif
+
 /**
  *	sk_alloc - All socket objects are allocated here
  *	@net: the applicable net namespace
@@ -1004,6 +1135,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		sk->sk_prot = sk->sk_prot_creator = prot;
 		sock_lock_init(sk);
 		sock_net_set(sk, get_net(net));
+		atomic_set(&sk->sk_wmem_alloc, 1);
+
+		sock_update_classid(sk);
 	}
 
 	return sk;
@@ -1017,7 +1151,8 @@ static void __sk_free(struct sock *sk)
 	if (sk->sk_destruct)
 		sk->sk_destruct(sk);
 
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_check(sk->sk_filter,
+				       atomic_read(&sk->sk_wmem_alloc) == 0);
 	if (filter) {
 		sk_filter_uncharge(sk, filter);
 		rcu_assign_pointer(sk->sk_filter, NULL);
@@ -1030,6 +1165,9 @@ static void __sk_free(struct sock *sk)
 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
 		       __func__, atomic_read(&sk->sk_omem_alloc));
 
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	put_pid(sk->sk_peer_pid);
 	put_net(sock_net(sk));
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
@@ -1082,6 +1220,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
+		newsk->sk_backlog.len = 0;
 
 		atomic_set(&newsk->sk_rmem_alloc, 0);
 		/*
@@ -1095,7 +1234,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		skb_queue_head_init(&newsk->sk_async_wait_queue);
 #endif
 
-		rwlock_init(&newsk->sk_dst_lock);
+		spin_lock_init(&newsk->sk_dst_lock);
 		rwlock_init(&newsk->sk_callback_lock);
 		lockdep_set_class_and_name(&newsk->sk_callback_lock,
 				af_callback_keys + newsk->sk_family,
@@ -1110,7 +1249,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		sock_reset_flag(newsk, SOCK_DONE);
 		skb_queue_head_init(&newsk->sk_error_queue);
 
-		filter = newsk->sk_filter;
+		filter = rcu_dereference_protected(newsk->sk_filter, 1);
 		if (filter != NULL)
 			sk_filter_charge(newsk, filter);
 
@@ -1125,6 +1264,11 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 
 		newsk->sk_err	   = 0;
 		newsk->sk_priority = 0;
+		/*
+		 * Before updating sk_refcnt, we must commit prior changes to memory
+		 * (Documentation/RCU/rculist_nulls.txt for details)
+		 */
+		smp_wmb();
 		atomic_set(&newsk->sk_refcnt, 2);
 
 		/*
@@ -1140,10 +1284,14 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		 */
 		sk_refcnt_debug_inc(newsk);
 		sk_set_socket(newsk, NULL);
-		newsk->sk_sleep	 = NULL;
+		newsk->sk_wq = NULL;
 
 		if (newsk->sk_prot->sockets_allocated)
 			percpu_counter_inc(newsk->sk_prot->sockets_allocated);
+
+		if (sock_flag(newsk, SOCK_TIMESTAMP) ||
+		    sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+			net_enable_timestamp();
 	}
 out:
 	return newsk;
@@ -1156,6 +1304,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 	sk->sk_route_caps = dst->dev->features;
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+	sk->sk_route_caps &= ~sk->sk_route_nocaps;
 	if (sk_can_gso(sk)) {
 		if (dst->header_len) {
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
@@ -1169,12 +1318,12 @@ EXPORT_SYMBOL_GPL(sk_setup_caps);
 
 void __init sk_init(void)
 {
-	if (num_physpages <= 4096) {
+	if (totalram_pages <= 4096) {
 		sysctl_wmem_max = 32767;
 		sysctl_rmem_max = 32767;
 		sysctl_wmem_default = 32767;
 		sysctl_rmem_default = 32767;
-	} else if (num_physpages >= 131072) {
+	} else if (totalram_pages >= 131072) {
 		sysctl_wmem_max = 131071;
 		sysctl_rmem_max = 131071;
 	}
@@ -1191,17 +1340,22 @@ void __init sk_init(void)
 void sock_wfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
-	int res;
+	unsigned int len = skb->truesize;
 
-	/* In case it might be waiting for more memory. */
-	res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc);
-	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
+	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+		/*
+		 * Keep a reference on sk_wmem_alloc, this will be released
+		 * after sk_write_space() call
+		 */
+		atomic_sub(len - 1, &sk->sk_wmem_alloc);
 		sk->sk_write_space(sk);
+		len = 1;
+	}
 	/*
-	 * if sk_wmem_alloc reached 0, we are last user and should
-	 * free this sock, as sk_free() call could not do it.
+	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
+	 * could not do because of in-flight packets
 	 */
-	if (res == 0)
+	if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
 		__sk_free(sk);
 }
 EXPORT_SYMBOL(sock_wfree);
@@ -1212,9 +1366,10 @@ EXPORT_SYMBOL(sock_wfree);
 void sock_rfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
+	unsigned int len = skb->truesize;
 
-	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
-	sk_mem_uncharge(skb->sk, skb->truesize);
+	atomic_sub(len, &sk->sk_rmem_alloc);
+	sk_mem_uncharge(sk, len);
 }
 EXPORT_SYMBOL(sock_rfree);
 
@@ -1223,9 +1378,9 @@ int sock_i_uid(struct sock *sk)
 {
 	int uid;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	return uid;
 }
 EXPORT_SYMBOL(sock_i_uid);
@@ -1234,9 +1389,9 @@ unsigned long sock_i_ino(struct sock *sk)
 {
 	unsigned long ino;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	return ino;
 }
 EXPORT_SYMBOL(sock_i_ino);
@@ -1319,7 +1474,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 		if (signal_pending(current))
 			break;
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
 			break;
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1328,7 +1483,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 			break;
 		timeo = schedule_timeout(timeo);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return timeo;
 }
 
@@ -1429,6 +1584,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
 EXPORT_SYMBOL(sock_alloc_send_skb);
 
 static void __lock_sock(struct sock *sk)
+	__releases(&sk->sk_lock.slock)
+	__acquires(&sk->sk_lock.slock)
 {
 	DEFINE_WAIT(wait);
 
@@ -1445,6 +1602,8 @@ static void __lock_sock(struct sock *sk)
 }
 
 static void __release_sock(struct sock *sk)
+	__releases(&sk->sk_lock.slock)
+	__acquires(&sk->sk_lock.slock)
 {
 	struct sk_buff *skb = sk->sk_backlog.head;
 
@@ -1455,6 +1614,7 @@ static void __release_sock(struct sock *sk)
 		do {
 			struct sk_buff *next = skb->next;
 
+			WARN_ON_ONCE(skb_dst_is_noref(skb));
 			skb->next = NULL;
 			sk_backlog_rcv(sk, skb);
 
@@ -1471,6 +1631,12 @@ static void __release_sock(struct sock *sk)
 
 		bh_lock_sock(sk);
 	} while ((skb = sk->sk_backlog.head) != NULL);
+
+	/*
+	 * Doing the zeroing here guarantee we can not loop forever
+	 * while a wild producer attempts to flood us.
+	 */
+	sk->sk_backlog.len = 0;
 }
 
 /**
@@ -1488,11 +1654,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
 	int rc;
 	DEFINE_WAIT(wait);
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 }
 EXPORT_SYMBOL(sk_wait_data);
@@ -1511,10 +1677,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 {
 	struct proto *prot = sk->sk_prot;
 	int amt = sk_mem_pages(size);
-	int allocated;
+	long allocated;
 
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-	allocated = atomic_add_return(amt, prot->memory_allocated);
+	allocated = atomic_long_add_return(amt, prot->memory_allocated);
 
 	/* Under limit. */
 	if (allocated <= prot->sysctl_mem[0]) {
@@ -1572,7 +1738,7 @@ suppress_allocation:
 
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
-	atomic_sub(amt, prot->memory_allocated);
+	atomic_long_sub(amt, prot->memory_allocated);
 	return 0;
 }
 EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1585,12 +1751,12 @@ void __sk_mem_reclaim(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
 
-	atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+	atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
 		   prot->memory_allocated);
 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
 	if (prot->memory_pressure && *prot->memory_pressure &&
-	    (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+	    (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
 		*prot->memory_pressure = 0;
 }
 EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -1660,7 +1826,7 @@ int sock_no_shutdown(struct socket *sock, int how)
 EXPORT_SYMBOL(sock_no_shutdown);
 
 int sock_no_setsockopt(struct socket *sock, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	return -EOPNOTSUPP;
 }
@@ -1714,41 +1880,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
 
 static void sock_def_wakeup(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_all(sk->sk_sleep);
-	read_unlock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_all(&wq->wait);
+	rcu_read_unlock();
 }
 
 static void sock_def_error_report(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_poll(&wq->wait, POLLERR);
 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_readable(struct sock *sk, int len)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
 						POLLRDNORM | POLLRDBAND);
 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-			wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
 		/* Should agree with poll, otherwise some programs break */
@@ -1756,7 +1934,7 @@ static void sock_def_write_space(struct sock *sk)
 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_destruct(struct sock *sk)
@@ -1810,12 +1988,12 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	if (sock) {
 		sk->sk_type	=	sock->type;
-		sk->sk_sleep	=	&sock->wait;
+		sk->sk_wq	=	sock->wq;
 		sock->sk	=	sk;
 	} else
-		sk->sk_sleep	=	NULL;
+		sk->sk_wq	=	NULL;
 
-	rwlock_init(&sk->sk_dst_lock);
+	spin_lock_init(&sk->sk_dst_lock);
 	rwlock_init(&sk->sk_callback_lock);
 	lockdep_set_class_and_name(&sk->sk_callback_lock,
 			af_callback_keys + sk->sk_family,
@@ -1830,9 +2008,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_sndmsg_page	=	NULL;
 	sk->sk_sndmsg_off	=	0;
 
-	sk->sk_peercred.pid 	=	0;
-	sk->sk_peercred.uid	=	-1;
-	sk->sk_peercred.gid	=	-1;
+	sk->sk_peer_pid 	=	NULL;
+	sk->sk_peer_cred	=	NULL;
 	sk->sk_write_pending	=	0;
 	sk->sk_rcvlowat		=	1;
 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
@@ -1840,8 +2017,12 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	sk->sk_stamp = ktime_set(-1L, 0);
 
+	/*
+	 * Before updating sk_refcnt, we must commit prior changes to memory
+	 * (Documentation/RCU/rculist_nulls.txt for details)
+	 */
+	smp_wmb();
 	atomic_set(&sk->sk_refcnt, 1);
-	atomic_set(&sk->sk_wmem_alloc, 1);
 	atomic_set(&sk->sk_drops, 0);
 }
 EXPORT_SYMBOL(sock_init_data);
@@ -1879,6 +2060,39 @@ void release_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(release_sock);
 
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken
+ *   sk_lock.slock locked, owned = 0, BH disabled
+ * return true if slow path is taken
+ *   sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+bool lock_sock_fast(struct sock *sk)
+{
+	might_sleep();
+	spin_lock_bh(&sk->sk_lock.slock);
+
+	if (!sk->sk_lock.owned)
+		/*
+		 * Note : We must disable BH
+		 */
+		return false;
+
+	__lock_sock(sk);
+	sk->sk_lock.owned = 1;
+	spin_unlock(&sk->sk_lock.slock);
+	/*
+	 * The sk_lock has mutex_lock() semantics here:
+	 */
+	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+	local_bh_enable();
+	return true;
+}
+EXPORT_SYMBOL(lock_sock_fast);
+
 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
 	struct timeval tv;
@@ -1977,7 +2191,7 @@ EXPORT_SYMBOL(sock_common_recvmsg);
  *	Set socket options on an inet socket.
  */
 int sock_common_setsockopt(struct socket *sock, int level, int optname,
-			   char __user *optval, int optlen)
+			   char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 
@@ -1987,7 +2201,7 @@ EXPORT_SYMBOL(sock_common_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
-				  char __user *optval, int optlen)
+				  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 
@@ -2049,8 +2263,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
 #ifdef CONFIG_NET_NS
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
-	int cpu = smp_processor_id();
-	per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
+	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
@@ -2066,13 +2279,13 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
 
-static int sock_inuse_init_net(struct net *net)
+static int __net_init sock_inuse_init_net(struct net *net)
 {
 	net->core.inuse = alloc_percpu(struct prot_inuse);
 	return net->core.inuse ? 0 : -ENOMEM;
 }
 
-static void sock_inuse_exit_net(struct net *net)
+static void __net_exit sock_inuse_exit_net(struct net *net)
 {
 	free_percpu(net->core.inuse);
 }
@@ -2096,7 +2309,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
 
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
-	__get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
+	__this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
@@ -2154,13 +2367,10 @@ int proto_register(struct proto *prot, int alloc_slab)
 		}
 
 		if (prot->rsk_prot != NULL) {
-			static const char mask[] = "request_sock_%s";
-
-			prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+			prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
 			if (prot->rsk_prot->slab_name == NULL)
 				goto out_free_sock_slab;
 
-			sprintf(prot->rsk_prot->slab_name, mask, prot->name);
 			prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
 								 prot->rsk_prot->obj_size, 0,
 								 SLAB_HWCACHE_ALIGN, NULL);
@@ -2173,14 +2383,11 @@ int proto_register(struct proto *prot, int alloc_slab)
 		}
 
 		if (prot->twsk_prot != NULL) {
-			static const char mask[] = "tw_sock_%s";
-
-			prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+			prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
 
 			if (prot->twsk_prot->twsk_slab_name == NULL)
 				goto out_free_request_sock_slab;
 
-			sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name);
 			prot->twsk_prot->twsk_slab =
 				kmem_cache_create(prot->twsk_prot->twsk_slab_name,
 						  prot->twsk_prot->twsk_obj_size,
@@ -2207,7 +2414,8 @@ out_free_request_sock_slab:
 		prot->rsk_prot->slab = NULL;
 	}
 out_free_request_sock_slab_name:
-	kfree(prot->rsk_prot->slab_name);
+	if (prot->rsk_prot)
+		kfree(prot->rsk_prot->slab_name);
 out_free_sock_slab:
 	kmem_cache_destroy(prot->slab);
 	prot->slab = NULL;
@@ -2268,12 +2476,12 @@ static char proto_method_implemented(const void *method)
 
 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 {
-	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
+	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
 		   proto->name,
 		   proto->obj_size,
 		   sock_prot_inuse_get(seq_file_net(seq), proto),
-		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+		   proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
 		   proto->max_header,
 		   proto->slab == NULL ? "no" : "yes",
diff --git a/net/core/stream.c b/net/core/stream.c
index a37debfeb1b..f5df85dcd20 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,18 +28,21 @@
 void sk_stream_write_space(struct sock *sk)
 {
 	struct socket *sock = sk->sk_socket;
+	struct socket_wq *wq;
 
 	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 
-		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-			wake_up_interruptible_poll(sk->sk_sleep, POLLOUT |
+		rcu_read_lock();
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
-		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
 			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+		rcu_read_unlock();
 	}
 }
-
 EXPORT_SYMBOL(sk_stream_write_space);
 
 /**
@@ -66,18 +69,17 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 		if (signal_pending(tsk))
 			return sock_intr_errno(*timeo_p);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		sk->sk_write_pending++;
 		done = sk_wait_event(sk, timeo_p,
 				     !sk->sk_err &&
 				     !((1 << sk->sk_state) &
 				       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		sk->sk_write_pending--;
 	} while (!done);
 	return 0;
 }
-
 EXPORT_SYMBOL(sk_stream_wait_connect);
 
 /**
@@ -96,16 +98,15 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
 		DEFINE_WAIT(wait);
 
 		do {
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 			if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
 				break;
 		} while (!signal_pending(current) && timeout);
 
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 }
-
 EXPORT_SYMBOL(sk_stream_wait_close);
 
 /**
@@ -126,7 +127,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 	while (1) {
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 			goto do_error;
@@ -140,10 +141,10 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		sk->sk_write_pending++;
-		sk_wait_event(sk, &current_timeo, !sk->sk_err &&
-						  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
-						  sk_stream_memory_free(sk) &&
-						  vm_wait);
+		sk_wait_event(sk, &current_timeo, sk->sk_err ||
+						  (sk->sk_shutdown & SEND_SHUTDOWN) ||
+						  (sk_stream_memory_free(sk) &&
+						  !vm_wait));
 		sk->sk_write_pending--;
 
 		if (vm_wait) {
@@ -157,7 +158,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 		*timeo_p = current_timeo;
 	}
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return err;
 
 do_error:
@@ -170,7 +171,6 @@ do_interrupted:
 	err = sock_intr_errno(*timeo_p);
 	goto out;
 }
-
 EXPORT_SYMBOL(sk_stream_wait_memory);
 
 int sk_stream_error(struct sock *sk, int flags, int err)
@@ -181,7 +181,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
 		send_sig(SIGPIPE, current, 0);
 	return err;
 }
-
 EXPORT_SYMBOL(sk_stream_error);
 
 void sk_stream_kill_queues(struct sock *sk)
@@ -206,5 +205,4 @@ void sk_stream_kill_queues(struct sock *sk)
 	 * have gone away, only the net layer knows can touch it.
 	 */
 }
-
 EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7db1de0497c..385b6095fdc 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,14 +10,77 @@
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/netdevice.h>
+#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
 #include <linux/init.h>
+#include <linux/slab.h>
+
 #include <net/ip.h>
 #include <net/sock.h>
 
+#ifdef CONFIG_RPS
+static int rps_sock_flow_sysctl(ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int orig_size, size;
+	int ret, i;
+	ctl_table tmp = {
+		.data = &size,
+		.maxlen = sizeof(size),
+		.mode = table->mode
+	};
+	struct rps_sock_flow_table *orig_sock_table, *sock_table;
+	static DEFINE_MUTEX(sock_flow_mutex);
+
+	mutex_lock(&sock_flow_mutex);
+
+	orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+					lockdep_is_held(&sock_flow_mutex));
+	size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
+
+	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+
+	if (write) {
+		if (size) {
+			if (size > 1<<30) {
+				/* Enforce limit to prevent overflow */
+				mutex_unlock(&sock_flow_mutex);
+				return -EINVAL;
+			}
+			size = roundup_pow_of_two(size);
+			if (size != orig_size) {
+				sock_table =
+				    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
+				if (!sock_table) {
+					mutex_unlock(&sock_flow_mutex);
+					return -ENOMEM;
+				}
+
+				sock_table->mask = size - 1;
+			} else
+				sock_table = orig_sock_table;
+
+			for (i = 0; i < size; i++)
+				sock_table->ents[i] = RPS_NO_CPU;
+		} else
+			sock_table = NULL;
+
+		if (sock_table != orig_sock_table) {
+			rcu_assign_pointer(rps_sock_flow_table, sock_table);
+			synchronize_rcu();
+			vfree(orig_sock_table);
+		}
+	}
+
+	mutex_unlock(&sock_flow_mutex);
+
+	return ret;
+}
+#endif /* CONFIG_RPS */
+
 static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
 	{
-		.ctl_name	= NET_CORE_WMEM_MAX,
 		.procname	= "wmem_max",
 		.data		= &sysctl_wmem_max,
 		.maxlen		= sizeof(int),
@@ -25,7 +88,6 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_CORE_RMEM_MAX,
 		.procname	= "rmem_max",
 		.data		= &sysctl_rmem_max,
 		.maxlen		= sizeof(int),
@@ -33,7 +95,6 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_CORE_WMEM_DEFAULT,
 		.procname	= "wmem_default",
 		.data		= &sysctl_wmem_default,
 		.maxlen		= sizeof(int),
@@ -41,7 +102,6 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_CORE_RMEM_DEFAULT,
 		.procname	= "rmem_default",
 		.data		= &sysctl_rmem_default,
 		.maxlen		= sizeof(int),
@@ -49,7 +109,6 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_CORE_DEV_WEIGHT,
 		.procname	= "dev_weight",
 		.data		= &weight_p,
 		.maxlen		= sizeof(int),
@@ -57,7 +116,6 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_CORE_MAX_BACKLOG,
 		.procname	= "netdev_max_backlog",
 		.data		= &netdev_max_backlog,
 		.maxlen		= sizeof(int),
@@ -65,16 +123,20 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_CORE_MSG_COST,
+		.procname	= "netdev_tstamp_prequeue",
+		.data		= &netdev_tstamp_prequeue,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
 		.procname	= "message_cost",
 		.data		= &net_ratelimit_state.interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
-		.strategy	= sysctl_jiffies,
 	},
 	{
-		.ctl_name	= NET_CORE_MSG_BURST,
 		.procname	= "message_burst",
 		.data		= &net_ratelimit_state.burst,
 		.maxlen		= sizeof(int),
@@ -82,16 +144,22 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 	{
-		.ctl_name	= NET_CORE_OPTMEM_MAX,
 		.procname	= "optmem_max",
 		.data		= &sysctl_optmem_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_RPS
+	{
+		.procname	= "rps_sock_flow_entries",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= rps_sock_flow_sysctl
+	},
+#endif
 #endif /* CONFIG_NET */
 	{
-		.ctl_name	= NET_CORE_BUDGET,
 		.procname	= "netdev_budget",
 		.data		= &netdev_budget,
 		.maxlen		= sizeof(int),
@@ -99,31 +167,29 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_CORE_WARNINGS,
 		.procname	= "warnings",
 		.data		= &net_msg_warn,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{ .ctl_name = 0 }
+	{ }
 };
 
 static struct ctl_table netns_core_table[] = {
 	{
-		.ctl_name	= NET_CORE_SOMAXCONN,
 		.procname	= "somaxconn",
 		.data		= &init_net.core.sysctl_somaxconn,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{ .ctl_name = 0 }
+	{ }
 };
 
 __net_initdata struct ctl_path net_core_path[] = {
-	{ .procname = "net", .ctl_name = CTL_NET, },
-	{ .procname = "core", .ctl_name = NET_CORE, },
+	{ .procname = "net", },
+	{ .procname = "core", },
 	{ },
 };
 
@@ -134,7 +200,7 @@ static __net_init int sysctl_core_net_init(struct net *net)
 	net->core.sysctl_somaxconn = SOMAXCONN;
 
 	tbl = netns_core_table;
-	if (net != &init_net) {
+	if (!net_eq(net, &init_net)) {
 		tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
 		if (tbl == NULL)
 			goto err_dup;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
new file mode 100644
index 00000000000..7e7ca375d43
--- /dev/null
+++ b/net/core/timestamping.c
@@ -0,0 +1,128 @@
+/*
+ * PTP 1588 clock support - support for timestamping in PHY devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/errqueue.h>
+#include <linux/phy.h>
+#include <linux/ptp_classify.h>
+#include <linux/skbuff.h>
+
+static struct sock_filter ptp_filter[] = {
+	PTP_FILTER
+};
+
+static unsigned int classify(const struct sk_buff *skb)
+{
+	if (likely(skb->dev &&
+		   skb->dev->phydev &&
+		   skb->dev->phydev->drv))
+		return sk_run_filter(skb, ptp_filter);
+	else
+		return PTP_CLASS_NONE;
+}
+
+void skb_clone_tx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	struct sk_buff *clone;
+	struct sock *sk = skb->sk;
+	unsigned int type;
+
+	if (!sk)
+		return;
+
+	type = classify(skb);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->txtstamp)) {
+			clone = skb_clone(skb, GFP_ATOMIC);
+			if (!clone)
+				return;
+			clone->sk = sk;
+			phydev->drv->txtstamp(phydev, clone, type);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+			       struct skb_shared_hwtstamps *hwtstamps)
+{
+	struct sock *sk = skb->sk;
+	struct sock_exterr_skb *serr;
+	int err;
+
+	if (!hwtstamps)
+		return;
+
+	*skb_hwtstamps(skb) = *hwtstamps;
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+	skb->sk = NULL;
+	err = sock_queue_err_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+
+bool skb_defer_rx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	unsigned int type;
+
+	if (skb_headroom(skb) < ETH_HLEN)
+		return false;
+	__skb_push(skb, ETH_HLEN);
+
+	type = classify(skb);
+
+	__skb_pull(skb, ETH_HLEN);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->rxtstamp))
+			return phydev->drv->rxtstamp(phydev, skb, type);
+		break;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+void __init skb_timestamping_init(void)
+{
+	BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
+}
diff --git a/net/core/utils.c b/net/core/utils.c
index 83221aee708..5fea0ab2190 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -24,6 +24,8 @@
 #include <linux/types.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
+#include <linux/ratelimit.h>
+
 #include <net/sock.h>
 
 #include <asm/byteorder.h>
@@ -73,9 +75,8 @@ __be32 in_aton(const char *str)
 				str++;
 		}
 	}
-	return(htonl(l));
+	return htonl(l);
 }
-
 EXPORT_SYMBOL(in_aton);
 
 #define IN6PTON_XDIGIT		0x00010000
@@ -91,18 +92,19 @@ EXPORT_SYMBOL(in_aton);
 
 static inline int xdigit2bin(char c, int delim)
 {
+	int val;
+
 	if (c == delim || c == '\0')
 		return IN6PTON_DELIM;
 	if (c == ':')
 		return IN6PTON_COLON_MASK;
 	if (c == '.')
 		return IN6PTON_DOT;
-	if (c >= '0' && c <= '9')
-		return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0'));
-	if (c >= 'a' && c <= 'f')
-		return (IN6PTON_XDIGIT | (c - 'a' + 10));
-	if (c >= 'A' && c <= 'F')
-		return (IN6PTON_XDIGIT | (c - 'A' + 10));
+
+	val = hex_to_bin(c);
+	if (val >= 0)
+		return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0);
+
 	if (delim == -1)
 		return IN6PTON_DELIM;
 	return IN6PTON_UNKNOWN;
@@ -160,7 +162,6 @@ out:
 		*end = s;
 	return ret;
 }
-
 EXPORT_SYMBOL(in4_pton);
 
 int in6_pton(const char *src, int srclen,
@@ -278,7 +279,6 @@ out:
 		*end = s;
 	return ret;
 }
-
 EXPORT_SYMBOL(in6_pton);
 
 void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,