108 files changed, 38529 insertions, 18472 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ab7a9124f98..438a73aa777 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -3,17 +3,15 @@
 #
 
 #   IPv6 as module will cause a CRASH if you try to unload it
-config IPV6
+menuconfig IPV6
 	tristate "The IPv6 protocol"
 	default m
-	select CRYPTO if IPV6_PRIVACY
-	select CRYPTO_MD5 if IPV6_PRIVACY
 	---help---
 	  This is complemental support for the IP version 6.
 	  You will still be able to do traditional IPv4 networking as well.
 
 	  For general information about IPv6, see
-	  <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
+	  <https://en.wikipedia.org/wiki/IPv6>.
 	  For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
 	  For specific information about IPv6 under Linux, read the HOWTO at
 	  <http://www.bieringer.de/linux/IPv6/>.
@@ -21,26 +19,38 @@ config IPV6
 	  To compile this protocol support as a module, choose M here: the 
 	  module will be called ipv6.
 
-config IPV6_PRIVACY
-	bool "IPv6: Privacy Extensions (RFC 3041) support"
-	depends on IPV6
+if IPV6
+
+config IPV6_ROUTER_PREF
+	bool "IPv6: Router Preference (RFC 4191) support"
+	---help---
+	  Router Preference is an optional extension to the Router
+	  Advertisement message which improves the ability of hosts
+	  to pick an appropriate router, especially when the hosts
+	  are placed in a multi-homed network.
+
+	  If unsure, say N.
+
+config IPV6_ROUTE_INFO
+	bool "IPv6: Route Information (RFC 4191) support"
+	depends on IPV6_ROUTER_PREF
 	---help---
-	  Privacy Extensions for Stateless Address Autoconfiguration in IPv6
-	  support.  With this option, additional periodically-alter 
-	  pseudo-random global-scope unicast address(es) will assigned to
-	  your interface(s).
-	
-	  By default, kernel do not generate temporary addresses.
-	  To use temporary addresses, do
-	
-	        echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr 
+	  This is experimental support of Route Information.
 
-	  See <file:Documentation/networking/ip-sysctl.txt> for details.
+	  If unsure, say N.
+
+config IPV6_OPTIMISTIC_DAD
+	bool "IPv6: Enable RFC 4429 Optimistic DAD"
+	---help---
+	  This is experimental support for optimistic Duplicate
+	  Address Detection.  It allows for autoconfigured addresses
+	  to be used more quickly.
+
+	  If unsure, say N.
 
 config INET6_AH
 	tristate "IPv6: AH transformation"
-	depends on IPV6
-	select XFRM
+	select XFRM_ALGO
 	select CRYPTO
 	select CRYPTO_HMAC
 	select CRYPTO_MD5
@@ -52,11 +62,12 @@ config INET6_AH
 
 config INET6_ESP
 	tristate "IPv6: ESP transformation"
-	depends on IPV6
-	select XFRM
+	select XFRM_ALGO
 	select CRYPTO
+	select CRYPTO_AUTHENC
 	select CRYPTO_HMAC
 	select CRYPTO_MD5
+	select CRYPTO_CBC
 	select CRYPTO_SHA1
 	select CRYPTO_DES
 	---help---
@@ -66,33 +77,185 @@ config INET6_ESP
 
 config INET6_IPCOMP
 	tristate "IPv6: IPComp transformation"
-	depends on IPV6
-	select XFRM
-	select INET6_TUNNEL
-	select CRYPTO
-	select CRYPTO_DEFLATE
+	select INET6_XFRM_TUNNEL
+	select XFRM_IPCOMP
 	---help---
 	  Support for IP Payload Compression Protocol (IPComp) (RFC3173),
 	  typically needed for IPsec.
 
 	  If unsure, say Y.
 
+config IPV6_MIP6
+	tristate "IPv6: Mobility"
+	select XFRM
+	---help---
+	  Support for IPv6 Mobility described in RFC 3775.
+
+	  If unsure, say N.
+
+config INET6_XFRM_TUNNEL
+	tristate
+	select INET6_TUNNEL
+	default n
+
 config INET6_TUNNEL
-	tristate "IPv6: tunnel transformation"
-	depends on IPV6
+	tristate
+	default n
+
+config INET6_XFRM_MODE_TRANSPORT
+	tristate "IPv6: IPsec transport mode"
+	default IPV6
 	select XFRM
 	---help---
-	  Support for generic IPv6-in-IPv6 tunnel transformation, which is
-	  required by the IPv6-in-IPv6 tunneling module as well as tunnel mode
-	  IPComp.
-	  
+	  Support for IPsec transport mode.
+
 	  If unsure, say Y.
 
+config INET6_XFRM_MODE_TUNNEL
+	tristate "IPv6: IPsec tunnel mode"
+	default IPV6
+	select XFRM
+	---help---
+	  Support for IPsec tunnel mode.
+
+	  If unsure, say Y.
+
+config INET6_XFRM_MODE_BEET
+	tristate "IPv6: IPsec BEET mode"
+	default IPV6
+	select XFRM
+	---help---
+	  Support for IPsec BEET mode.
+
+	  If unsure, say Y.
+
+config INET6_XFRM_MODE_ROUTEOPTIMIZATION
+	tristate "IPv6: MIPv6 route optimization mode"
+	select XFRM
+	---help---
+	  Support for MIPv6 route optimization mode.
+
+config IPV6_VTI
+tristate "Virtual (secure) IPv6: tunneling"
+	select IPV6_TUNNEL
+	select NET_IP_TUNNEL
+	depends on INET6_XFRM_MODE_TUNNEL
+	---help---
+	Tunneling means encapsulating data of one protocol type within
+	another protocol and sending it over a channel that understands the
+	encapsulating protocol. This can be used with xfrm mode tunnel to give
+	the notion of a secure tunnel for IPSEC and then use routing protocol
+	on top.
+
+config IPV6_SIT
+	tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
+	select INET_TUNNEL
+	select NET_IP_TUNNEL
+	select IPV6_NDISC_NODETYPE
+	default y
+	---help---
+	  Tunneling means encapsulating data of one protocol type within
+	  another protocol and sending it over a channel that understands the
+	  encapsulating protocol. This driver implements encapsulation of IPv6
+	  into IPv4 packets. This is useful if you want to connect two IPv6
+	  networks over an IPv4-only path.
+
+	  Saying M here will produce a module called sit. If unsure, say Y.
+
+config IPV6_SIT_6RD
+	bool "IPv6: IPv6 Rapid Deployment (6RD)"
+	depends on IPV6_SIT
+	default n
+	---help---
+	  IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
+	  mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly
+	  deploy IPv6 unicast service to IPv4 sites to which it provides
+	  customer premise equipment.  Like 6to4, it utilizes stateless IPv6 in
+	  IPv4 encapsulation in order to transit IPv4-only network
+	  infrastructure.  Unlike 6to4, a 6rd service provider uses an IPv6
+	  prefix of its own in place of the fixed 6to4 prefix.
+
+	  With this option enabled, the SIT driver offers 6rd functionality by
+	  providing additional ioctl API to configure the IPv6 Prefix for in
+	  stead of static 2002::/16 for 6to4.
+
+	  If unsure, say N.
+
+config IPV6_NDISC_NODETYPE
+	bool
+
 config IPV6_TUNNEL
-	tristate "IPv6: IPv6-in-IPv6 tunnel"
+	tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)"
+	select INET6_TUNNEL
+	---help---
+	  Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
+	  RFC 2473.
+
+	  If unsure, say N.
+
+config IPV6_GRE
+	tristate "IPv6: GRE tunnel"
+	select IPV6_TUNNEL
+	select NET_IP_TUNNEL
+	---help---
+	  Tunneling means encapsulating data of one protocol type within
+	  another protocol and sending it over a channel that understands the
+	  encapsulating protocol. This particular tunneling driver implements
+	  GRE (Generic Routing Encapsulation) and at this time allows
+	  encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure.
+	  This driver is useful if the other endpoint is a Cisco router: Cisco
+	  likes GRE much better than the other Linux tunneling driver ("IP
+	  tunneling" above). In addition, GRE allows multicast redistribution
+	  through the tunnel.
+
+	  Saying M here will produce a module called ip6_gre. If unsure, say N.
+
+config IPV6_MULTIPLE_TABLES
+	bool "IPv6: Multiple Routing Tables"
+	select FIB_RULES
+	---help---
+	  Support multiple routing tables.
+
+config IPV6_SUBTREES
+	bool "IPv6: source address based routing"
+	depends on IPV6_MULTIPLE_TABLES
+	---help---
+	  Enable routing by source address or prefix.
+
+	  The destination address is still the primary routing key, so mixing
+	  normal and source prefix specific routes in the same routing table
+	  may sometimes lead to unintended routing behavior.  This can be
+	  avoided by defining different routing tables for the normal and
+	  source prefix specific routes.
+
+	  If unsure, say N.
+
+config IPV6_MROUTE
+	bool "IPv6: multicast routing"
 	depends on IPV6
 	---help---
-	  Support for IPv6-in-IPv6 tunnels described in RFC 2473.
+	  Experimental support for IPv6 multicast forwarding.
+	  If unsure, say N.
 
+config IPV6_MROUTE_MULTIPLE_TABLES
+	bool "IPv6: multicast policy routing"
+	depends on IPV6_MROUTE
+	select FIB_RULES
+	help
+	  Normally, a multicast router runs a userspace daemon and decides
+	  what to do with a multicast packet based on the source and
+	  destination addresses. If you say Y here, the multicast router
+	  will also be able to take interfaces and packet marks into
+	  account and run multiple instances of userspace daemons
+	  simultaneously, each one handling a single table.
+
+	  If unsure, say N.
+
+config IPV6_PIMSM_V2
+	bool "IPv6: PIM-SM version 2 support"
+	depends on IPV6_MROUTE
+	---help---
+	  Support for IPv6 PIM multicast routing protocol PIM-SMv2.
 	  If unsure, say N.
 
+endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 6460eec834b..2fe68364bb2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -4,24 +4,44 @@
 
 obj-$(CONFIG_IPV6) += ipv6.o
 
-ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
-		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
-		protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
-		exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
-		ip6_flowlabel.o ipv6_syms.o netfilter.o
+ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
+		addrlabel.o \
+		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
+		raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
+		exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
+
+ipv6-offload :=	ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
+
+ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
+ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
 
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
-	xfrm6_output.o
+	xfrm6_output.o xfrm6_protocol.o
+ipv6-$(CONFIG_NETFILTER) += netfilter.o
+ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
+ipv6-$(CONFIG_PROC_FS) += proc.o
+ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
+
 ipv6-objs += $(ipv6-y)
 
 obj-$(CONFIG_INET6_AH) += ah6.o
 obj-$(CONFIG_INET6_ESP) += esp6.o
 obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
-obj-$(CONFIG_INET6_TUNNEL) += xfrm6_tunnel.o 
+obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
+obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
+obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
+obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
+obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
+obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
+obj-$(CONFIG_IPV6_MIP6) += mip6.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
 
+obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
+obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
 
-obj-y += exthdrs_core.o
+obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
+obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
 
 obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2c5f57299d6..5667b3003af 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3,11 +3,9 @@
  *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
  *
- *	$Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
@@ -35,33 +33,44 @@
  *	YOSHIFUJI Hideaki @USAGI	:	ARCnet support
  *	YOSHIFUJI Hideaki @USAGI	:	convert /proc/net/if_inet6 to
  *						seq_file.
+ *	YOSHIFUJI Hideaki @USAGI	:	improved source address
+ *						selection; consider scope,
+ *						status etc.
  */
 
-#include <linux/config.h>
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/kernel.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
-#include <linux/sched.h>
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/if_arcnet.h>
 #include <linux/if_infiniband.h>
 #include <linux/route.h>
 #include <linux/inetdevice.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
+#include <linux/capability.h>
 #include <linux/delay.h>
 #include <linux/notifier.h>
 #include <linux/string.h>
+#include <linux/hash.h>
 
+#include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 
+#include <net/af_ieee802154.h>
+#include <net/firewire.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/ndisc.h>
@@ -69,82 +78,94 @@
 #include <net/addrconf.h>
 #include <net/tcp.h>
 #include <net/ip.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
 #include <linux/if_tunnel.h>
 #include <linux/rtnetlink.h>
-
-#ifdef CONFIG_IPV6_PRIVACY
+#include <linux/netconf.h>
 #include <linux/random.h>
-#include <linux/crypto.h>
-#include <linux/scatterlist.h>
-#endif
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <asm/unaligned.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/export.h>
 
 /* Set to 3 to get tracing... */
 #define ACONF_DEBUG 2
 
 #if ACONF_DEBUG >= 3
-#define ADBG(x) printk x
+#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
 #else
-#define ADBG(x)
+#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
 #endif
 
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
-#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
+
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
 
 #ifdef CONFIG_SYSCTL
-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
-static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
+static void addrconf_sysctl_register(struct inet6_dev *idev);
+static void addrconf_sysctl_unregister(struct inet6_dev *idev);
+#else
+static inline void addrconf_sysctl_register(struct inet6_dev *idev)
+{
+}
+
+static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
+{
+}
 #endif
 
-#ifdef CONFIG_IPV6_PRIVACY
-static int __ipv6_regen_rndid(struct inet6_dev *idev);
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 
+static void __ipv6_regen_rndid(struct inet6_dev *idev);
+static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
 static void ipv6_regen_rndid(unsigned long data);
 
-static int desync_factor = MAX_DESYNC_FACTOR * HZ;
-static struct crypto_tfm *md5_tfm;
-static DEFINE_SPINLOCK(md5_tfm_lock);
-#endif
-
+static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static int ipv6_count_addresses(struct inet6_dev *idev);
 
 /*
  *	Configured unicast address hash table
  */
-static struct inet6_ifaddr		*inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_RWLOCK(addrconf_hash_lock);
-
-/* Protects inet6 devices */
-DEFINE_RWLOCK(addrconf_lock);
+static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(addrconf_hash_lock);
 
-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);
 
-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
-static DEFINE_SPINLOCK(addrconf_verify_lock);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
 
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
+static void addrconf_type_change(struct net_device *dev,
+				 unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
-static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
-static void addrconf_dad_timer(unsigned long data);
+static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+						  int plen,
+						  const struct net_device *dev,
+						  u32 flags, u32 noflags);
+
+static void addrconf_dad_start(struct inet6_ifaddr *ifp);
+static void addrconf_dad_work(struct work_struct *w);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(unsigned long data);
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 
-static void inet6_prefix_notify(int event, struct inet6_dev *idev, 
+static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 				struct prefix_info *pinfo);
-static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev);
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+			       struct net_device *dev);
 
-static struct notifier_block *inet6addr_chain;
-
-struct ipv6_devconf ipv6_devconf = {
+static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -152,167 +173,139 @@ struct ipv6_devconf ipv6_devconf = {
 	.accept_redirects	= 1,
 	.autoconf		= 1,
 	.force_mld_version	= 0,
+	.mldv1_unsolicited_report_interval = 10 * HZ,
+	.mldv2_unsolicited_report_interval = HZ,
 	.dad_transmits		= 1,
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
 	.use_tempaddr 		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
 	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,
 	.regen_max_retry	= REGEN_MAX_RETRY,
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
-#endif
 	.max_addresses		= IPV6_MAX_ADDRESSES,
+	.accept_ra_defrtr	= 1,
+	.accept_ra_pinfo	= 1,
+#ifdef CONFIG_IPV6_ROUTER_PREF
+	.accept_ra_rtr_pref	= 1,
+	.rtr_probe_interval	= 60 * HZ,
+#ifdef CONFIG_IPV6_ROUTE_INFO
+	.accept_ra_rt_info_max_plen = 0,
+#endif
+#endif
+	.proxy_ndp		= 0,
+	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
+	.disable_ipv6		= 0,
+	.accept_dad		= 1,
+	.suppress_frag_ndisc	= 1,
 };
 
-static struct ipv6_devconf ipv6_devconf_dflt = {
+static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
 	.accept_ra		= 1,
 	.accept_redirects	= 1,
 	.autoconf		= 1,
+	.force_mld_version	= 0,
+	.mldv1_unsolicited_report_interval = 10 * HZ,
+	.mldv2_unsolicited_report_interval = HZ,
 	.dad_transmits		= 1,
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
 	.use_tempaddr		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
 	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,
 	.regen_max_retry	= REGEN_MAX_RETRY,
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
-#endif
 	.max_addresses		= IPV6_MAX_ADDRESSES,
-};
-
-/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-#if 0
-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+	.accept_ra_defrtr	= 1,
+	.accept_ra_pinfo	= 1,
+#ifdef CONFIG_IPV6_ROUTER_PREF
+	.accept_ra_rtr_pref	= 1,
+	.rtr_probe_interval	= 60 * HZ,
+#ifdef CONFIG_IPV6_ROUTE_INFO
+	.accept_ra_rt_info_max_plen = 0,
+#endif
 #endif
-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+	.proxy_ndp		= 0,
+	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
+	.disable_ipv6		= 0,
+	.accept_dad		= 1,
+	.suppress_frag_ndisc	= 1,
+};
 
-int ipv6_addr_type(const struct in6_addr *addr)
+/* Check if a valid qdisc is available */
+static inline bool addrconf_qdisc_ok(const struct net_device *dev)
 {
-	int type;
-	u32 st;
-
-	st = addr->s6_addr32[0];
-
-	if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
-		type = IPV6_ADDR_MULTICAST;
-
-		switch((st & htonl(0x00FF0000))) {
-			case __constant_htonl(0x00010000):
-				type |= IPV6_ADDR_LOOPBACK;
-				break;
-
-			case __constant_htonl(0x00020000):
-				type |= IPV6_ADDR_LINKLOCAL;
-				break;
-
-			case __constant_htonl(0x00050000):
-				type |= IPV6_ADDR_SITELOCAL;
-				break;
-		};
-		return type;
-	}
-
-	type = IPV6_ADDR_UNICAST;
-
-	/* Consider all addresses with the first three bits different of
-	   000 and 111 as finished.
-	 */
-	if ((st & htonl(0xE0000000)) != htonl(0x00000000) &&
-	    (st & htonl(0xE0000000)) != htonl(0xE0000000))
-		return type;
-	
-	if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
-		return (IPV6_ADDR_LINKLOCAL | type);
-
-	if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
-		return (IPV6_ADDR_SITELOCAL | type);
-
-	if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
-		if (addr->s6_addr32[2] == 0) {
-			if (addr->s6_addr32[3] == 0)
-				return IPV6_ADDR_ANY;
-
-			if (addr->s6_addr32[3] == htonl(0x00000001))
-				return (IPV6_ADDR_LOOPBACK | type);
-
-			return (IPV6_ADDR_COMPATv4 | type);
-		}
-
-		if (addr->s6_addr32[2] == htonl(0x0000ffff))
-			return IPV6_ADDR_MAPPED;
-	}
+	return !qdisc_tx_is_noop(dev);
+}
 
-	st &= htonl(0xFF000000);
-	if (st == 0)
-		return IPV6_ADDR_RESERVED;
-	st &= htonl(0xFE000000);
-	if (st == htonl(0x02000000))
-		return IPV6_ADDR_RESERVED;	/* for NSAP */
-	if (st == htonl(0x04000000))
-		return IPV6_ADDR_RESERVED;	/* for IPX */
-	return type;
+static void addrconf_del_rs_timer(struct inet6_dev *idev)
+{
+	if (del_timer(&idev->rs_timer))
+		__in6_dev_put(idev);
 }
 
-static void addrconf_del_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
 {
-	if (del_timer(&ifp->timer))
+	if (cancel_delayed_work(&ifp->dad_work))
 		__in6_ifa_put(ifp);
 }
 
-enum addrconf_timer_t
+static void addrconf_mod_rs_timer(struct inet6_dev *idev,
+				  unsigned long when)
 {
-	AC_NONE,
-	AC_DAD,
-	AC_RS,
-};
+	if (!timer_pending(&idev->rs_timer))
+		in6_dev_hold(idev);
+	mod_timer(&idev->rs_timer, jiffies + when);
+}
 
-static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
-			       enum addrconf_timer_t what,
-			       unsigned long when)
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+				   unsigned long delay)
 {
-	if (!del_timer(&ifp->timer))
+	if (!delayed_work_pending(&ifp->dad_work))
 		in6_ifa_hold(ifp);
-
-	switch (what) {
-	case AC_DAD:
-		ifp->timer.function = addrconf_dad_timer;
-		break;
-	case AC_RS:
-		ifp->timer.function = addrconf_rs_timer;
-		break;
-	default:;
-	}
-	ifp->timer.expires = jiffies + when;
-	add_timer(&ifp->timer);
+	mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
 }
 
-/* Nobody refers to this device, we may destroy it. */
-
-void in6_dev_finish_destroy(struct inet6_dev *idev)
+static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
-	struct net_device *dev = idev->dev;
-	BUG_TRAP(idev->addr_list==NULL);
-	BUG_TRAP(idev->mc_list==NULL);
-#ifdef NET_REFCNT_DEBUG
-	printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
-#endif
-	dev_put(dev);
-	if (!idev->dead) {
-		printk("Freeing alive inet6 device %p\n", idev);
-		return;
+	int i;
+
+	idev->stats.ipv6 = alloc_percpu(struct ipstats_mib);
+	if (!idev->stats.ipv6)
+		goto err_ip;
+
+	for_each_possible_cpu(i) {
+		struct ipstats_mib *addrconf_stats;
+		addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i);
+		u64_stats_init(&addrconf_stats->syncp);
 	}
-	snmp6_free_dev(idev);
-	kfree(idev);
+
+
+	idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device),
+					GFP_KERNEL);
+	if (!idev->stats.icmpv6dev)
+		goto err_icmp;
+	idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device),
+					   GFP_KERNEL);
+	if (!idev->stats.icmpv6msgdev)
+		goto err_icmpmsg;
+
+	return 0;
+
+err_icmpmsg:
+	kfree(idev->stats.icmpv6dev);
+err_icmp:
+	free_percpu(idev->stats.ipv6);
+err_ip:
+	return -ENOMEM;
 }
 
-static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
+static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 {
 	struct inet6_dev *ndev;
 
@@ -321,223 +314,550 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 	if (dev->mtu < IPV6_MIN_MTU)
 		return NULL;
 
-	ndev = kmalloc(sizeof(struct inet6_dev), GFP_KERNEL);
-
-	if (ndev) {
-		memset(ndev, 0, sizeof(struct inet6_dev));
+	ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
 
-		rwlock_init(&ndev->lock);
-		ndev->dev = dev;
-		memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
-		ndev->cnf.mtu6 = dev->mtu;
-		ndev->cnf.sysctl = NULL;
-		ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
-		if (ndev->nd_parms == NULL) {
-			kfree(ndev);
-			return NULL;
-		}
-		/* We refer to the device */
-		dev_hold(dev);
-
-		if (snmp6_alloc_dev(ndev) < 0) {
-			ADBG((KERN_WARNING
-				"%s(): cannot allocate memory for statistics; dev=%s.\n",
-				__FUNCTION__, dev->name));
-			neigh_parms_release(&nd_tbl, ndev->nd_parms);
-			ndev->dead = 1;
-			in6_dev_finish_destroy(ndev);
-			return NULL;
-		}
+	if (ndev == NULL)
+		return NULL;
 
-		if (snmp6_register_dev(ndev) < 0) {
-			ADBG((KERN_WARNING
-				"%s(): cannot create /proc/net/dev_snmp6/%s\n",
-				__FUNCTION__, dev->name));
-			neigh_parms_release(&nd_tbl, ndev->nd_parms);
-			ndev->dead = 1;
-			in6_dev_finish_destroy(ndev);
-			return NULL;
-		}
+	rwlock_init(&ndev->lock);
+	ndev->dev = dev;
+	INIT_LIST_HEAD(&ndev->addr_list);
+	setup_timer(&ndev->rs_timer, addrconf_rs_timer,
+		    (unsigned long)ndev);
+	memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
+	ndev->cnf.mtu6 = dev->mtu;
+	ndev->cnf.sysctl = NULL;
+	ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
+	if (ndev->nd_parms == NULL) {
+		kfree(ndev);
+		return NULL;
+	}
+	if (ndev->cnf.forwarding)
+		dev_disable_lro(dev);
+	/* We refer to the device */
+	dev_hold(dev);
+
+	if (snmp6_alloc_dev(ndev) < 0) {
+		ADBG(KERN_WARNING
+			"%s: cannot allocate memory for statistics; dev=%s.\n",
+			__func__, dev->name);
+		neigh_parms_release(&nd_tbl, ndev->nd_parms);
+		dev_put(dev);
+		kfree(ndev);
+		return NULL;
+	}
 
-		/* One reference from device.  We must do this before
-		 * we invoke __ipv6_regen_rndid().
-		 */
-		in6_dev_hold(ndev);
+	if (snmp6_register_dev(ndev) < 0) {
+		ADBG(KERN_WARNING
+			"%s: cannot create /proc/net/dev_snmp6/%s\n",
+			__func__, dev->name);
+		neigh_parms_release(&nd_tbl, ndev->nd_parms);
+		ndev->dead = 1;
+		in6_dev_finish_destroy(ndev);
+		return NULL;
+	}
 
-#ifdef CONFIG_IPV6_PRIVACY
-		get_random_bytes(ndev->rndid, sizeof(ndev->rndid));
-		get_random_bytes(ndev->entropy, sizeof(ndev->entropy));
-		init_timer(&ndev->regen_timer);
-		ndev->regen_timer.function = ipv6_regen_rndid;
-		ndev->regen_timer.data = (unsigned long) ndev;
-		if ((dev->flags&IFF_LOOPBACK) ||
-		    dev->type == ARPHRD_TUNNEL ||
-		    dev->type == ARPHRD_NONE ||
-		    dev->type == ARPHRD_SIT) {
-			printk(KERN_INFO
-				"Disabled Privacy Extensions on device %p(%s)\n",
-				dev, dev->name);
-			ndev->cnf.use_tempaddr = -1;
-		} else {
-			in6_dev_hold(ndev);
-			ipv6_regen_rndid((unsigned long) ndev);
-		}
-#endif
+	/* One reference from device.  We must do this before
+	 * we invoke __ipv6_regen_rndid().
+	 */
+	in6_dev_hold(ndev);
 
-		write_lock_bh(&addrconf_lock);
-		dev->ip6_ptr = ndev;
-		write_unlock_bh(&addrconf_lock);
+	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
+		ndev->cnf.accept_dad = -1;
 
-		ipv6_mc_init_dev(ndev);
-		ndev->tstamp = jiffies;
-#ifdef CONFIG_SYSCTL
-		neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, 
-				      NET_IPV6_NEIGH, "ipv6",
-				      &ndisc_ifinfo_sysctl_change,
-				      NULL);
-		addrconf_sysctl_register(ndev, &ndev->cnf);
+#if IS_ENABLED(CONFIG_IPV6_SIT)
+	if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
+		pr_info("%s: Disabled Multicast RS\n", dev->name);
+		ndev->cnf.rtr_solicits = 0;
+	}
 #endif
+
+	INIT_LIST_HEAD(&ndev->tempaddr_list);
+	setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
+	if ((dev->flags&IFF_LOOPBACK) ||
+	    dev->type == ARPHRD_TUNNEL ||
+	    dev->type == ARPHRD_TUNNEL6 ||
+	    dev->type == ARPHRD_SIT ||
+	    dev->type == ARPHRD_NONE) {
+		ndev->cnf.use_tempaddr = -1;
+	} else {
+		in6_dev_hold(ndev);
+		ipv6_regen_rndid((unsigned long) ndev);
 	}
+
+	ndev->token = in6addr_any;
+
+	if (netif_running(dev) && addrconf_qdisc_ok(dev))
+		ndev->if_flags |= IF_READY;
+
+	ipv6_mc_init_dev(ndev);
+	ndev->tstamp = jiffies;
+	addrconf_sysctl_register(ndev);
+	/* protected by rtnl_lock */
+	rcu_assign_pointer(dev->ip6_ptr, ndev);
+
+	/* Join interface-local all-node multicast group */
+	ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
+
+	/* Join all-node multicast group */
+	ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
+
+	/* Join all-router multicast group if forwarding is set */
+	if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
+		ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
+
 	return ndev;
 }
 
-static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
+static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
 {
 	struct inet6_dev *idev;
 
 	ASSERT_RTNL();
 
-	if ((idev = __in6_dev_get(dev)) == NULL) {
-		if ((idev = ipv6_add_dev(dev)) == NULL)
+	idev = __in6_dev_get(dev);
+	if (!idev) {
+		idev = ipv6_add_dev(dev);
+		if (!idev)
 			return NULL;
 	}
+
 	if (dev->flags&IFF_UP)
 		ipv6_mc_up(idev);
 	return idev;
 }
 
+static int inet6_netconf_msgsize_devconf(int type)
+{
+	int size =  NLMSG_ALIGN(sizeof(struct netconfmsg))
+		    + nla_total_size(4);	/* NETCONFA_IFINDEX */
+
+	/* type -1 is used for ALL */
+	if (type == -1 || type == NETCONFA_FORWARDING)
+		size += nla_total_size(4);
+#ifdef CONFIG_IPV6_MROUTE
+	if (type == -1 || type == NETCONFA_MC_FORWARDING)
+		size += nla_total_size(4);
+#endif
+	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
+		size += nla_total_size(4);
+
+	return size;
+}
+
+static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
+				      struct ipv6_devconf *devconf, u32 portid,
+				      u32 seq, int event, unsigned int flags,
+				      int type)
+{
+	struct nlmsghdr  *nlh;
+	struct netconfmsg *ncm;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
+			flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ncm = nlmsg_data(nlh);
+	ncm->ncm_family = AF_INET6;
+
+	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
+		goto nla_put_failure;
+
+	/* type -1 is used for ALL */
+	if ((type == -1 || type == NETCONFA_FORWARDING) &&
+	    nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
+		goto nla_put_failure;
+#ifdef CONFIG_IPV6_MROUTE
+	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
+	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
+			devconf->mc_forwarding) < 0)
+		goto nla_put_failure;
+#endif
+	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
+	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
+				  struct ipv6_devconf *devconf)
+{
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
+					 RTM_NEWNETCONF, 0, type);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+	return;
+errout:
+	rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
+}
+
+static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
+	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
+	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
+	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
+};
+
+static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
+				     struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct nlattr *tb[NETCONFA_MAX+1];
+	struct netconfmsg *ncm;
+	struct sk_buff *skb;
+	struct ipv6_devconf *devconf;
+	struct inet6_dev *in6_dev;
+	struct net_device *dev;
+	int ifindex;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
+			  devconf_ipv6_policy);
+	if (err < 0)
+		goto errout;
+
+	err = EINVAL;
+	if (!tb[NETCONFA_IFINDEX])
+		goto errout;
+
+	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
+	switch (ifindex) {
+	case NETCONFA_IFINDEX_ALL:
+		devconf = net->ipv6.devconf_all;
+		break;
+	case NETCONFA_IFINDEX_DEFAULT:
+		devconf = net->ipv6.devconf_dflt;
+		break;
+	default:
+		dev = __dev_get_by_index(net, ifindex);
+		if (dev == NULL)
+			goto errout;
+		in6_dev = __in6_dev_get(dev);
+		if (in6_dev == NULL)
+			goto errout;
+		devconf = &in6_dev->cnf;
+		break;
+	}
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_netconf_fill_devconf(skb, ifindex, devconf,
+					 NETLINK_CB(in_skb).portid,
+					 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
+					 -1);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+errout:
+	return err;
+}
+
+static int inet6_netconf_dump_devconf(struct sk_buff *skb,
+				      struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	int h, s_h;
+	int idx, s_idx;
+	struct net_device *dev;
+	struct inet6_dev *idev;
+	struct hlist_head *head;
+
+	s_h = cb->args[0];
+	s_idx = idx = cb->args[1];
+
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		rcu_read_lock();
+		cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
+			  net->dev_base_seq;
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			idev = __in6_dev_get(dev);
+			if (!idev)
+				goto cont;
+
+			if (inet6_netconf_fill_devconf(skb, dev->ifindex,
+						       &idev->cnf,
+						       NETLINK_CB(cb->skb).portid,
+						       cb->nlh->nlmsg_seq,
+						       RTM_NEWNETCONF,
+						       NLM_F_MULTI,
+						       -1) <= 0) {
+				rcu_read_unlock();
+				goto done;
+			}
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+			idx++;
+		}
+		rcu_read_unlock();
+	}
+	if (h == NETDEV_HASHENTRIES) {
+		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+					       net->ipv6.devconf_all,
+					       NETLINK_CB(cb->skb).portid,
+					       cb->nlh->nlmsg_seq,
+					       RTM_NEWNETCONF, NLM_F_MULTI,
+					       -1) <= 0)
+			goto done;
+		else
+			h++;
+	}
+	if (h == NETDEV_HASHENTRIES + 1) {
+		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+					       net->ipv6.devconf_dflt,
+					       NETLINK_CB(cb->skb).portid,
+					       cb->nlh->nlmsg_seq,
+					       RTM_NEWNETCONF, NLM_F_MULTI,
+					       -1) <= 0)
+			goto done;
+		else
+			h++;
+	}
+done:
+	cb->args[0] = h;
+	cb->args[1] = idx;
+
+	return skb->len;
+}
+
 #ifdef CONFIG_SYSCTL
 static void dev_forward_change(struct inet6_dev *idev)
 {
 	struct net_device *dev;
 	struct inet6_ifaddr *ifa;
-	struct in6_addr addr;
 
 	if (!idev)
 		return;
 	dev = idev->dev;
-	if (dev && (dev->flags & IFF_MULTICAST)) {
-		ipv6_addr_all_routers(&addr);
-	
-		if (idev->cnf.forwarding)
-			ipv6_dev_mc_inc(dev, &addr);
-		else
-			ipv6_dev_mc_dec(dev, &addr);
+	if (idev->cnf.forwarding)
+		dev_disable_lro(dev);
+	if (dev->flags & IFF_MULTICAST) {
+		if (idev->cnf.forwarding) {
+			ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
+			ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters);
+		} else {
+			ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters);
+		}
 	}
-	for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+
+	list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		if (ifa->flags&IFA_F_TENTATIVE)
+			continue;
 		if (idev->cnf.forwarding)
 			addrconf_join_anycast(ifa);
 		else
 			addrconf_leave_anycast(ifa);
 	}
+	inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
+				     dev->ifindex, &idev->cnf);
 }
 
 
-static void addrconf_forward_change(void)
+static void addrconf_forward_change(struct net *net, __s32 newf)
 {
 	struct net_device *dev;
 	struct inet6_dev *idev;
 
-	read_lock(&dev_base_lock);
-	for (dev=dev_base; dev; dev=dev->next) {
-		read_lock(&addrconf_lock);
+	for_each_netdev(net, dev) {
 		idev = __in6_dev_get(dev);
 		if (idev) {
-			int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
-			idev->cnf.forwarding = ipv6_devconf.forwarding;
+			int changed = (!idev->cnf.forwarding) ^ (!newf);
+			idev->cnf.forwarding = newf;
 			if (changed)
 				dev_forward_change(idev);
 		}
-		read_unlock(&addrconf_lock);
 	}
-	read_unlock(&dev_base_lock);
+}
+
+static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
+{
+	struct net *net;
+	int old;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	net = (struct net *)table->extra2;
+	old = *p;
+	*p = newf;
+
+	if (p == &net->ipv6.devconf_dflt->forwarding) {
+		if ((!newf) ^ (!old))
+			inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+						     NETCONFA_IFINDEX_DEFAULT,
+						     net->ipv6.devconf_dflt);
+		rtnl_unlock();
+		return 0;
+	}
+
+	if (p == &net->ipv6.devconf_all->forwarding) {
+		net->ipv6.devconf_dflt->forwarding = newf;
+		addrconf_forward_change(net, newf);
+		if ((!newf) ^ (!old))
+			inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+						     NETCONFA_IFINDEX_ALL,
+						     net->ipv6.devconf_all);
+	} else if ((!newf) ^ (!old))
+		dev_forward_change((struct inet6_dev *)table->extra1);
+	rtnl_unlock();
+
+	if (newf)
+		rt6_purge_dflt_routers(net);
+	return 1;
 }
 #endif
 
 /* Nobody refers to this ifaddr, destroy it */
-
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
-	BUG_TRAP(ifp->if_next==NULL);
-	BUG_TRAP(ifp->lst_next==NULL);
+	WARN_ON(!hlist_unhashed(&ifp->addr_lst));
+
 #ifdef NET_REFCNT_DEBUG
-	printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
+	pr_debug("%s\n", __func__);
 #endif
 
 	in6_dev_put(ifp->idev);
 
-	if (del_timer(&ifp->timer))
-		printk("Timer is still running, when freeing ifa=%p\n", ifp);
+	if (cancel_delayed_work(&ifp->dad_work))
+		pr_notice("delayed DAD work was pending while freeing ifa=%p\n",
+			  ifp);
 
-	if (!ifp->dead) {
-		printk("Freeing alive inet6 address %p\n", ifp);
+	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+		pr_warn("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
-	dst_release(&ifp->rt->u.dst);
+	ip6_rt_put(ifp->rt);
 
-	kfree(ifp);
+	kfree_rcu(ifp, rcu);
+}
+
+static void
+ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
+{
+	struct list_head *p;
+	int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
+
+	/*
+	 * Each device address list is sorted in order of scope -
+	 * global before linklocal.
+	 */
+	list_for_each(p, &idev->addr_list) {
+		struct inet6_ifaddr *ifa
+			= list_entry(p, struct inet6_ifaddr, if_list);
+		if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
+			break;
+	}
+
+	list_add_tail(&ifp->if_list, p);
+}
+
+static u32 inet6_addr_hash(const struct in6_addr *addr)
+{
+	return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT);
 }
 
 /* On success it returns ifp with increased reference count */
 
 static struct inet6_ifaddr *
-ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
-	      int scope, u32 flags)
+ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
+	      const struct in6_addr *peer_addr, int pfxlen,
+	      int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
 {
 	struct inet6_ifaddr *ifa = NULL;
 	struct rt6_info *rt;
-	int hash;
+	unsigned int hash;
 	int err = 0;
+	int addr_type = ipv6_addr_type(addr);
+
+	if (addr_type == IPV6_ADDR_ANY ||
+	    addr_type & IPV6_ADDR_MULTICAST ||
+	    (!(idev->dev->flags & IFF_LOOPBACK) &&
+	     addr_type & IPV6_ADDR_LOOPBACK))
+		return ERR_PTR(-EADDRNOTAVAIL);
 
-	read_lock_bh(&addrconf_lock);
+	rcu_read_lock_bh();
 	if (idev->dead) {
 		err = -ENODEV;			/*XXX*/
 		goto out2;
 	}
 
-	write_lock(&addrconf_hash_lock);
+	if (idev->cnf.disable_ipv6) {
+		err = -EACCES;
+		goto out2;
+	}
+
+	spin_lock(&addrconf_hash_lock);
 
 	/* Ignore adding duplicate addresses on an interface */
-	if (ipv6_chk_same_addr(addr, idev->dev)) {
-		ADBG(("ipv6_add_addr: already assigned\n"));
+	if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
+		ADBG("ipv6_add_addr: already assigned\n");
 		err = -EEXIST;
 		goto out;
 	}
 
-	ifa = kmalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+	ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
 
 	if (ifa == NULL) {
-		ADBG(("ipv6_add_addr: malloc failed\n"));
+		ADBG("ipv6_add_addr: malloc failed\n");
 		err = -ENOBUFS;
 		goto out;
 	}
 
-	rt = addrconf_dst_alloc(idev, addr, 0);
+	rt = addrconf_dst_alloc(idev, addr, false);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		goto out;
 	}
 
-	memset(ifa, 0, sizeof(struct inet6_ifaddr));
-	ipv6_addr_copy(&ifa->addr, addr);
+	neigh_parms_data_state_setall(idev->nd_parms);
+
+	ifa->addr = *addr;
+	if (peer_addr)
+		ifa->peer_addr = *peer_addr;
 
 	spin_lock_init(&ifa->lock);
-	init_timer(&ifa->timer);
-	ifa->timer.data = (unsigned long) ifa;
+	spin_lock_init(&ifa->state_lock);
+	INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
+	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
 	ifa->flags = flags | IFA_F_TENTATIVE;
+	ifa->valid_lft = valid_lft;
+	ifa->prefered_lft = prefered_lft;
 	ifa->cstamp = ifa->tstamp = jiffies;
+	ifa->tokenized = false;
+
+	ifa->rt = rt;
 
 	ifa->idev = idev;
 	in6_dev_hold(idev);
@@ -545,35 +865,27 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	in6_ifa_hold(ifa);
 
 	/* Add to big hash table */
-	hash = ipv6_addr_hash(addr);
+	hash = inet6_addr_hash(addr);
 
-	ifa->lst_next = inet6_addr_lst[hash];
-	inet6_addr_lst[hash] = ifa;
-	in6_ifa_hold(ifa);
-	write_unlock(&addrconf_hash_lock);
+	hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+	spin_unlock(&addrconf_hash_lock);
 
 	write_lock(&idev->lock);
 	/* Add to inet6_dev unicast addr list. */
-	ifa->if_next = idev->addr_list;
-	idev->addr_list = ifa;
+	ipv6_link_dev_addr(idev, ifa);
 
-#ifdef CONFIG_IPV6_PRIVACY
 	if (ifa->flags&IFA_F_TEMPORARY) {
-		ifa->tmp_next = idev->tempaddr_list;
-		idev->tempaddr_list = ifa;
+		list_add(&ifa->tmp_list, &idev->tempaddr_list);
 		in6_ifa_hold(ifa);
 	}
-#endif
-
-	ifa->rt = rt;
 
 	in6_ifa_hold(ifa);
 	write_unlock(&idev->lock);
 out2:
-	read_unlock_bh(&addrconf_lock);
+	rcu_read_unlock_bh();
 
 	if (likely(err == 0))
-		notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
+		inet6addr_notifier_call_chain(NETDEV_UP, ifa);
 	else {
 		kfree(ifa);
 		ifa = ERR_PTR(err);
@@ -581,144 +893,162 @@ out2:
 
 	return ifa;
 out:
-	write_unlock(&addrconf_hash_lock);
+	spin_unlock(&addrconf_hash_lock);
 	goto out2;
 }
 
-/* This function wants to get referenced ifp and releases it before return */
+enum cleanup_prefix_rt_t {
+	CLEANUP_PREFIX_RT_NOP,    /* no cleanup action for prefix route */
+	CLEANUP_PREFIX_RT_DEL,    /* delete the prefix route */
+	CLEANUP_PREFIX_RT_EXPIRE, /* update the lifetime of the prefix route */
+};
 
-static void ipv6_del_addr(struct inet6_ifaddr *ifp)
+/*
+ * Check, whether the prefix for ifp would still need a prefix route
+ * after deleting ifp. The function returns one of the CLEANUP_PREFIX_RT_*
+ * constants.
+ *
+ * 1) we don't purge prefix if address was not permanent.
+ *    prefix is managed by its own lifetime.
+ * 2) we also don't purge, if the address was IFA_F_NOPREFIXROUTE.
+ * 3) if there are no addresses, delete prefix.
+ * 4) if there are still other permanent address(es),
+ *    corresponding prefix is still permanent.
+ * 5) if there are still other addresses with IFA_F_NOPREFIXROUTE,
+ *    don't purge the prefix, assume user space is managing it.
+ * 6) otherwise, update prefix lifetime to the
+ *    longest valid lifetime among the corresponding
+ *    addresses on the device.
+ *    Note: subsequent RA will update lifetime.
+ **/
+static enum cleanup_prefix_rt_t
+check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
 {
-	struct inet6_ifaddr *ifa, **ifap;
+	struct inet6_ifaddr *ifa;
 	struct inet6_dev *idev = ifp->idev;
-	int hash;
-	int deleted = 0, onlink = 0;
-	unsigned long expires = jiffies;
+	unsigned long lifetime;
+	enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_DEL;
+
+	*expires = jiffies;
 
-	hash = ipv6_addr_hash(&ifp->addr);
+	list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		if (ifa == ifp)
+			continue;
+		if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+				       ifp->prefix_len))
+			continue;
+		if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE))
+			return CLEANUP_PREFIX_RT_NOP;
 
-	ifp->dead = 1;
+		action = CLEANUP_PREFIX_RT_EXPIRE;
 
-	write_lock_bh(&addrconf_hash_lock);
-	for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
-	     ifap = &ifa->lst_next) {
-		if (ifa == ifp) {
-			*ifap = ifa->lst_next;
-			__in6_ifa_put(ifp);
-			ifa->lst_next = NULL;
-			break;
-		}
+		spin_lock(&ifa->lock);
+
+		lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
+		/*
+		 * Note: Because this address is
+		 * not permanent, lifetime <
+		 * LONG_MAX / HZ here.
+		 */
+		if (time_before(*expires, ifa->tstamp + lifetime * HZ))
+			*expires = ifa->tstamp + lifetime * HZ;
+		spin_unlock(&ifa->lock);
 	}
-	write_unlock_bh(&addrconf_hash_lock);
 
-	write_lock_bh(&idev->lock);
-#ifdef CONFIG_IPV6_PRIVACY
-	if (ifp->flags&IFA_F_TEMPORARY) {
-		for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
-		     ifap = &ifa->tmp_next) {
-			if (ifa == ifp) {
-				*ifap = ifa->tmp_next;
-				if (ifp->ifpub) {
-					in6_ifa_put(ifp->ifpub);
-					ifp->ifpub = NULL;
-				}
-				__in6_ifa_put(ifp);
-				ifa->tmp_next = NULL;
-				break;
-			}
+	return action;
+}
+
+static void
+cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
+{
+	struct rt6_info *rt;
+
+	rt = addrconf_get_prefix_route(&ifp->addr,
+				       ifp->prefix_len,
+				       ifp->idev->dev,
+				       0, RTF_GATEWAY | RTF_DEFAULT);
+	if (rt) {
+		if (del_rt)
+			ip6_del_rt(rt);
+		else {
+			if (!(rt->rt6i_flags & RTF_EXPIRES))
+				rt6_set_expires(rt, expires);
+			ip6_rt_put(rt);
 		}
 	}
-#endif
+}
 
-	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;
-	     ifap = &ifa->if_next) {
-		if (ifa == ifp) {
-			*ifap = ifa->if_next;
-			__in6_ifa_put(ifp);
-			ifa->if_next = NULL;
-			if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
-				break;
-			deleted = 1;
-		} else if (ifp->flags & IFA_F_PERMANENT) {
-			if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
-					      ifp->prefix_len)) {
-				if (ifa->flags & IFA_F_PERMANENT) {
-					onlink = 1;
-					if (deleted)
-						break;
-				} else {
-					unsigned long lifetime;
-
-					if (!onlink)
-						onlink = -1;
-
-					spin_lock(&ifa->lock);
-					lifetime = min_t(unsigned long,
-							 ifa->valid_lft, 0x7fffffffUL/HZ);
-					if (time_before(expires,
-							ifa->tstamp + lifetime * HZ))
-						expires = ifa->tstamp + lifetime * HZ;
-					spin_unlock(&ifa->lock);
-				}
-			}
+
+/* This function wants to get referenced ifp and releases it before return */
+
+static void ipv6_del_addr(struct inet6_ifaddr *ifp)
+{
+	int state;
+	enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
+	unsigned long expires;
+
+	ASSERT_RTNL();
+
+	spin_lock_bh(&ifp->state_lock);
+	state = ifp->state;
+	ifp->state = INET6_IFADDR_STATE_DEAD;
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (state == INET6_IFADDR_STATE_DEAD)
+		goto out;
+
+	spin_lock_bh(&addrconf_hash_lock);
+	hlist_del_init_rcu(&ifp->addr_lst);
+	spin_unlock_bh(&addrconf_hash_lock);
+
+	write_lock_bh(&ifp->idev->lock);
+
+	if (ifp->flags&IFA_F_TEMPORARY) {
+		list_del(&ifp->tmp_list);
+		if (ifp->ifpub) {
+			in6_ifa_put(ifp->ifpub);
+			ifp->ifpub = NULL;
 		}
+		__in6_ifa_put(ifp);
 	}
-	write_unlock_bh(&idev->lock);
 
-	ipv6_ifa_notify(RTM_DELADDR, ifp);
+	if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
+		action = check_cleanup_prefix_route(ifp, &expires);
 
-	notifier_call_chain(&inet6addr_chain,NETDEV_DOWN,ifp);
+	list_del_init(&ifp->if_list);
+	__in6_ifa_put(ifp);
 
-	addrconf_del_timer(ifp);
+	write_unlock_bh(&ifp->idev->lock);
 
-	/*
-	 * Purge or update corresponding prefix
-	 *
-	 * 1) we don't purge prefix here if address was not permanent.
-	 *    prefix is managed by its own lifetime.
-	 * 2) if there're no addresses, delete prefix.
-	 * 3) if there're still other permanent address(es),
-	 *    corresponding prefix is still permanent.
-	 * 4) otherwise, update prefix lifetime to the
-	 *    longest valid lifetime among the corresponding
-	 *    addresses on the device.
-	 *    Note: subsequent RA will update lifetime.
-	 *
-	 * --yoshfuji
-	 */
-	if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
-		struct in6_addr prefix;
-		struct rt6_info *rt;
+	addrconf_del_dad_work(ifp);
 
-		ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
-		rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1);
+	ipv6_ifa_notify(RTM_DELADDR, ifp);
 
-		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
-			if (onlink == 0) {
-				ip6_del_rt(rt, NULL, NULL, NULL);
-				rt = NULL;
-			} else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
-				rt->rt6i_expires = expires;
-				rt->rt6i_flags |= RTF_EXPIRES;
-			}
-		}
-		dst_release(&rt->u.dst);
+	inet6addr_notifier_call_chain(NETDEV_DOWN, ifp);
+
+	if (action != CLEANUP_PREFIX_RT_NOP) {
+		cleanup_prefix_route(ifp, expires,
+			action == CLEANUP_PREFIX_RT_DEL);
 	}
 
+	/* clean up prefsrc entries */
+	rt6_remove_prefsrc(ifp);
+out:
 	in6_ifa_put(ifp);
 }
 
-#ifdef CONFIG_IPV6_PRIVACY
 static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr addr, *tmpaddr;
-	unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp;
+	unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age;
+	unsigned long regen_advance;
 	int tmp_plen;
 	int ret = 0;
-	int max_addresses;
+	u32 addr_flags;
+	unsigned long now = jiffies;
 
-	write_lock(&idev->lock);
+	write_lock_bh(&idev->lock);
 	if (ift) {
 		spin_lock_bh(&ift->lock);
 		memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
@@ -730,9 +1060,8 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
 retry:
 	in6_dev_hold(idev);
 	if (idev->cnf.use_tempaddr <= 0) {
-		write_unlock(&idev->lock);
-		printk(KERN_INFO
-			"ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
+		write_unlock_bh(&idev->lock);
+		pr_info("%s: use_tempaddr is disabled\n", __func__);
 		in6_dev_put(idev);
 		ret = -1;
 		goto out;
@@ -741,232 +1070,402 @@ retry:
 	if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
 		idev->cnf.use_tempaddr = -1;	/*XXX*/
 		spin_unlock_bh(&ifp->lock);
-		write_unlock(&idev->lock);
-		printk(KERN_WARNING
-			"ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
+		write_unlock_bh(&idev->lock);
+		pr_warn("%s: regeneration time exceeded - disabled temporary address support\n",
+			__func__);
 		in6_dev_put(idev);
 		ret = -1;
 		goto out;
 	}
 	in6_ifa_hold(ifp);
 	memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
-	if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
-		spin_unlock_bh(&ifp->lock);
-		write_unlock(&idev->lock);
-		printk(KERN_WARNING
-			"ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
-		in6_ifa_put(ifp);
-		in6_dev_put(idev);
-		ret = -1;
-		goto out;
-	}
+	__ipv6_try_regen_rndid(idev, tmpaddr);
 	memcpy(&addr.s6_addr[8], idev->rndid, 8);
+	age = (now - ifp->tstamp) / HZ;
 	tmp_valid_lft = min_t(__u32,
 			      ifp->valid_lft,
-			      idev->cnf.temp_valid_lft);
-	tmp_prefered_lft = min_t(__u32, 
-				 ifp->prefered_lft, 
-				 idev->cnf.temp_prefered_lft - desync_factor / HZ);
+			      idev->cnf.temp_valid_lft + age);
+	tmp_prefered_lft = min_t(__u32,
+				 ifp->prefered_lft,
+				 idev->cnf.temp_prefered_lft + age -
+				 idev->cnf.max_desync_factor);
 	tmp_plen = ifp->prefix_len;
-	max_addresses = idev->cnf.max_addresses;
-	tmp_cstamp = ifp->cstamp;
 	tmp_tstamp = ifp->tstamp;
 	spin_unlock_bh(&ifp->lock);
 
-	write_unlock(&idev->lock);
-	ift = !max_addresses ||
-	      ipv6_count_addresses(idev) < max_addresses ? 
-		ipv6_add_addr(idev, &addr, tmp_plen,
-			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL;
-	if (!ift || IS_ERR(ift)) {
+	regen_advance = idev->cnf.regen_max_retry *
+	                idev->cnf.dad_transmits *
+	                NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ;
+	write_unlock_bh(&idev->lock);
+
+	/* A temporary address is created only if this calculated Preferred
+	 * Lifetime is greater than REGEN_ADVANCE time units.  In particular,
+	 * an implementation must not create a temporary address with a zero
+	 * Preferred Lifetime.
+	 * Use age calculation as in addrconf_verify to avoid unnecessary
+	 * temporary addresses being generated.
+	 */
+	age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+	if (tmp_prefered_lft <= regen_advance + age) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
-		printk(KERN_INFO
-			"ipv6_create_tempaddr(): retry temporary address regeneration.\n");
+		ret = -1;
+		goto out;
+	}
+
+	addr_flags = IFA_F_TEMPORARY;
+	/* set in addrconf_prefix_rcv() */
+	if (ifp->flags & IFA_F_OPTIMISTIC)
+		addr_flags |= IFA_F_OPTIMISTIC;
+
+	ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
+			    ipv6_addr_scope(&addr), addr_flags,
+			    tmp_valid_lft, tmp_prefered_lft);
+	if (IS_ERR(ift)) {
+		in6_ifa_put(ifp);
+		in6_dev_put(idev);
+		pr_info("%s: retry temporary address regeneration\n", __func__);
 		tmpaddr = &addr;
-		write_lock(&idev->lock);
+		write_lock_bh(&idev->lock);
 		goto retry;
 	}
 
 	spin_lock_bh(&ift->lock);
 	ift->ifpub = ifp;
-	ift->valid_lft = tmp_valid_lft;
-	ift->prefered_lft = tmp_prefered_lft;
-	ift->cstamp = tmp_cstamp;
+	ift->cstamp = now;
 	ift->tstamp = tmp_tstamp;
 	spin_unlock_bh(&ift->lock);
 
-	addrconf_dad_start(ift, 0);
+	addrconf_dad_start(ift);
 	in6_ifa_put(ift);
 	in6_dev_put(idev);
 out:
 	return ret;
 }
-#endif
 
 /*
- *	Choose an appropriate source address
- *	should do:
- *	i)	get an address with an appropriate scope
- *	ii)	see if there is a specific route for the destination and use
- *		an address of the attached interface 
- *	iii)	don't use deprecated addresses
+ *	Choose an appropriate source address (RFC3484)
  */
-static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref)
-{
-	int pref;
-	pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2;
-#ifdef CONFIG_IPV6_PRIVACY
-	pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1;
+enum {
+	IPV6_SADDR_RULE_INIT = 0,
+	IPV6_SADDR_RULE_LOCAL,
+	IPV6_SADDR_RULE_SCOPE,
+	IPV6_SADDR_RULE_PREFERRED,
+#ifdef CONFIG_IPV6_MIP6
+	IPV6_SADDR_RULE_HOA,
 #endif
-	return pref;
-}
+	IPV6_SADDR_RULE_OIF,
+	IPV6_SADDR_RULE_LABEL,
+	IPV6_SADDR_RULE_PRIVACY,
+	IPV6_SADDR_RULE_ORCHID,
+	IPV6_SADDR_RULE_PREFIX,
+	IPV6_SADDR_RULE_MAX
+};
 
-#ifdef CONFIG_IPV6_PRIVACY
-#define IPV6_GET_SADDR_MAXSCORE(score)	((score) == 3)
-#else
-#define IPV6_GET_SADDR_MAXSCORE(score)	(score)
-#endif
+struct ipv6_saddr_score {
+	int			rule;
+	int			addr_type;
+	struct inet6_ifaddr	*ifa;
+	DECLARE_BITMAP(scorebits, IPV6_SADDR_RULE_MAX);
+	int			scopedist;
+	int			matchlen;
+};
 
-int ipv6_dev_get_saddr(struct net_device *dev,
-		       struct in6_addr *daddr, struct in6_addr *saddr)
-{
-	struct inet6_ifaddr *ifp = NULL;
-	struct inet6_ifaddr *match = NULL;
-	struct inet6_dev *idev;
+struct ipv6_saddr_dst {
+	const struct in6_addr *addr;
+	int ifindex;
 	int scope;
-	int err;
-	int hiscore = -1, score;
+	int label;
+	unsigned int prefs;
+};
 
-	scope = ipv6_addr_scope(daddr);
+static inline int ipv6_saddr_preferred(int type)
+{
+	if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|IPV6_ADDR_LOOPBACK))
+		return 1;
+	return 0;
+}
 
-	/*
-	 *	known dev
-	 *	search dev and walk through dev addresses
-	 */
+static int ipv6_get_saddr_eval(struct net *net,
+			       struct ipv6_saddr_score *score,
+			       struct ipv6_saddr_dst *dst,
+			       int i)
+{
+	int ret;
 
-	if (dev) {
-		if (dev->flags & IFF_LOOPBACK)
-			scope = IFA_HOST;
+	if (i <= score->rule) {
+		switch (i) {
+		case IPV6_SADDR_RULE_SCOPE:
+			ret = score->scopedist;
+			break;
+		case IPV6_SADDR_RULE_PREFIX:
+			ret = score->matchlen;
+			break;
+		default:
+			ret = !!test_bit(i, score->scorebits);
+		}
+		goto out;
+	}
 
-		read_lock(&addrconf_lock);
-		idev = __in6_dev_get(dev);
-		if (idev) {
-			read_lock_bh(&idev->lock);
-			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-				if (ifp->scope == scope) {
-					if (ifp->flags&IFA_F_TENTATIVE)
-						continue;
-#ifdef CONFIG_IPV6_PRIVACY
-					score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
-#else
-					score = ipv6_saddr_pref(ifp, 0);
+	switch (i) {
+	case IPV6_SADDR_RULE_INIT:
+		/* Rule 0: remember if hiscore is not ready yet */
+		ret = !!score->ifa;
+		break;
+	case IPV6_SADDR_RULE_LOCAL:
+		/* Rule 1: Prefer same address */
+		ret = ipv6_addr_equal(&score->ifa->addr, dst->addr);
+		break;
+	case IPV6_SADDR_RULE_SCOPE:
+		/* Rule 2: Prefer appropriate scope
+		 *
+		 *      ret
+		 *       ^
+		 *    -1 |  d 15
+		 *    ---+--+-+---> scope
+		 *       |
+		 *       |             d is scope of the destination.
+		 *  B-d  |  \
+		 *       |   \      <- smaller scope is better if
+		 *  B-15 |    \        if scope is enough for destination.
+		 *       |             ret = B - scope (-1 <= scope >= d <= 15).
+		 * d-C-1 | /
+		 *       |/         <- greater is better
+		 *   -C  /             if scope is not enough for destination.
+		 *      /|             ret = scope - C (-1 <= d < scope <= 15).
+		 *
+		 * d - C - 1 < B -15 (for all -1 <= d <= 15).
+		 * C > d + 14 - B >= 15 + 14 - B = 29 - B.
+		 * Assume B = 0 and we get C > 29.
+		 */
+		ret = __ipv6_addr_src_scope(score->addr_type);
+		if (ret >= dst->scope)
+			ret = -ret;
+		else
+			ret -= 128;	/* 30 is enough */
+		score->scopedist = ret;
+		break;
+	case IPV6_SADDR_RULE_PREFERRED:
+		/* Rule 3: Avoid deprecated and optimistic addresses */
+		ret = ipv6_saddr_preferred(score->addr_type) ||
+		      !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC));
+		break;
+#ifdef CONFIG_IPV6_MIP6
+	case IPV6_SADDR_RULE_HOA:
+	    {
+		/* Rule 4: Prefer home address */
+		int prefhome = !(dst->prefs & IPV6_PREFER_SRC_COA);
+		ret = !(score->ifa->flags & IFA_F_HOMEADDRESS) ^ prefhome;
+		break;
+	    }
 #endif
-					if (score <= hiscore)
-						continue;
-
-					if (match)
-						in6_ifa_put(match);
-					match = ifp;
-					hiscore = score;
-					in6_ifa_hold(ifp);
-
-					if (IPV6_GET_SADDR_MAXSCORE(score)) {
-						read_unlock_bh(&idev->lock);
-						read_unlock(&addrconf_lock);
-						goto out;
-					}
-				}
-			}
-			read_unlock_bh(&idev->lock);
-		}
-		read_unlock(&addrconf_lock);
+	case IPV6_SADDR_RULE_OIF:
+		/* Rule 5: Prefer outgoing interface */
+		ret = (!dst->ifindex ||
+		       dst->ifindex == score->ifa->idev->dev->ifindex);
+		break;
+	case IPV6_SADDR_RULE_LABEL:
+		/* Rule 6: Prefer matching label */
+		ret = ipv6_addr_label(net,
+				      &score->ifa->addr, score->addr_type,
+				      score->ifa->idev->dev->ifindex) == dst->label;
+		break;
+	case IPV6_SADDR_RULE_PRIVACY:
+	    {
+		/* Rule 7: Prefer public address
+		 * Note: prefer temporary address if use_tempaddr >= 2
+		 */
+		int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
+				!!(dst->prefs & IPV6_PREFER_SRC_TMP) :
+				score->ifa->idev->cnf.use_tempaddr >= 2;
+		ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
+		break;
+	    }
+	case IPV6_SADDR_RULE_ORCHID:
+		/* Rule 8-: Prefer ORCHID vs ORCHID or
+		 *	    non-ORCHID vs non-ORCHID
+		 */
+		ret = !(ipv6_addr_orchid(&score->ifa->addr) ^
+			ipv6_addr_orchid(dst->addr));
+		break;
+	case IPV6_SADDR_RULE_PREFIX:
+		/* Rule 8: Use longest matching prefix */
+		ret = ipv6_addr_diff(&score->ifa->addr, dst->addr);
+		if (ret > score->ifa->prefix_len)
+			ret = score->ifa->prefix_len;
+		score->matchlen = ret;
+		break;
+	default:
+		ret = 0;
 	}
 
-	if (scope == IFA_LINK)
-		goto out;
+	if (ret)
+		__set_bit(i, score->scorebits);
+	score->rule = i;
+out:
+	return ret;
+}
 
-	/*
-	 *	dev == NULL or search failed for specified dev
-	 */
+int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
+		       const struct in6_addr *daddr, unsigned int prefs,
+		       struct in6_addr *saddr)
+{
+	struct ipv6_saddr_score scores[2],
+				*score = &scores[0], *hiscore = &scores[1];
+	struct ipv6_saddr_dst dst;
+	struct net_device *dev;
+	int dst_type;
+
+	dst_type = __ipv6_addr_type(daddr);
+	dst.addr = daddr;
+	dst.ifindex = dst_dev ? dst_dev->ifindex : 0;
+	dst.scope = __ipv6_addr_src_scope(dst_type);
+	dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex);
+	dst.prefs = prefs;
+
+	hiscore->rule = -1;
+	hiscore->ifa = NULL;
+
+	rcu_read_lock();
+
+	for_each_netdev_rcu(net, dev) {
+		struct inet6_dev *idev;
+
+		/* Candidate Source Address (section 4)
+		 *  - multicast and link-local destination address,
+		 *    the set of candidate source address MUST only
+		 *    include addresses assigned to interfaces
+		 *    belonging to the same link as the outgoing
+		 *    interface.
+		 * (- For site-local destination addresses, the
+		 *    set of candidate source addresses MUST only
+		 *    include addresses assigned to interfaces
+		 *    belonging to the same site as the outgoing
+		 *    interface.)
+		 */
+		if (((dst_type & IPV6_ADDR_MULTICAST) ||
+		     dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
+		    dst.ifindex && dev->ifindex != dst.ifindex)
+			continue;
 
-	read_lock(&dev_base_lock);
-	read_lock(&addrconf_lock);
-	for (dev = dev_base; dev; dev=dev->next) {
 		idev = __in6_dev_get(dev);
-		if (idev) {
-			read_lock_bh(&idev->lock);
-			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-				if (ifp->scope == scope) {
-					if (ifp->flags&IFA_F_TENTATIVE)
-						continue;
-#ifdef CONFIG_IPV6_PRIVACY
-					score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
-#else
-					score = ipv6_saddr_pref(ifp, 0);
-#endif
-					if (score <= hiscore)
-						continue;
+		if (!idev)
+			continue;
 
-					if (match)
-						in6_ifa_put(match);
-					match = ifp;
-					hiscore = score;
-					in6_ifa_hold(ifp);
+		read_lock_bh(&idev->lock);
+		list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+			int i;
+
+			/*
+			 * - Tentative Address (RFC2462 section 5.4)
+			 *  - A tentative address is not considered
+			 *    "assigned to an interface" in the traditional
+			 *    sense, unless it is also flagged as optimistic.
+			 * - Candidate Source Address (section 4)
+			 *  - In any case, anycast addresses, multicast
+			 *    addresses, and the unspecified address MUST
+			 *    NOT be included in a candidate set.
+			 */
+			if ((score->ifa->flags & IFA_F_TENTATIVE) &&
+			    (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
+				continue;
+
+			score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+
+			if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
+				     score->addr_type & IPV6_ADDR_MULTICAST)) {
+				LIMIT_NETDEBUG(KERN_DEBUG
+					       "ADDRCONF: unspecified / multicast address "
+					       "assigned as unicast address on %s",
+					       dev->name);
+				continue;
+			}
 
-					if (IPV6_GET_SADDR_MAXSCORE(score)) {
-						read_unlock_bh(&idev->lock);
-						goto out_unlock_base;
+			score->rule = -1;
+			bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+
+			for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
+				int minihiscore, miniscore;
+
+				minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i);
+				miniscore = ipv6_get_saddr_eval(net, score, &dst, i);
+
+				if (minihiscore > miniscore) {
+					if (i == IPV6_SADDR_RULE_SCOPE &&
+					    score->scopedist > 0) {
+						/*
+						 * special case:
+						 * each remaining entry
+						 * has too small (not enough)
+						 * scope, because ifa entries
+						 * are sorted by their scope
+						 * values.
+						 */
+						goto try_nextdev;
 					}
+					break;
+				} else if (minihiscore < miniscore) {
+					if (hiscore->ifa)
+						in6_ifa_put(hiscore->ifa);
+
+					in6_ifa_hold(score->ifa);
+
+					swap(hiscore, score);
+
+					/* restore our iterator */
+					score->ifa = hiscore->ifa;
+
+					break;
 				}
 			}
-			read_unlock_bh(&idev->lock);
 		}
+try_nextdev:
+		read_unlock_bh(&idev->lock);
 	}
+	rcu_read_unlock();
 
-out_unlock_base:
-	read_unlock(&addrconf_lock);
-	read_unlock(&dev_base_lock);
+	if (!hiscore->ifa)
+		return -EADDRNOTAVAIL;
 
-out:
-	err = -EADDRNOTAVAIL;
-	if (match) {
-		ipv6_addr_copy(saddr, &match->addr);
-		err = 0;
-		in6_ifa_put(match);
-	}
-
-	return err;
+	*saddr = hiscore->ifa->addr;
+	in6_ifa_put(hiscore->ifa);
+	return 0;
 }
+EXPORT_SYMBOL(ipv6_dev_get_saddr);
 
-
-int ipv6_get_saddr(struct dst_entry *dst,
-		   struct in6_addr *daddr, struct in6_addr *saddr)
+int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+		      u32 banned_flags)
 {
-	return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_idev->dev : NULL, daddr, saddr);
-}
+	struct inet6_ifaddr *ifp;
+	int err = -EADDRNOTAVAIL;
 
+	list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) {
+		if (ifp->scope > IFA_LINK)
+			break;
+		if (ifp->scope == IFA_LINK &&
+		    !(ifp->flags & banned_flags)) {
+			*addr = ifp->addr;
+			err = 0;
+			break;
+		}
+	}
+	return err;
+}
 
-int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
+int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
+		    u32 banned_flags)
 {
 	struct inet6_dev *idev;
 	int err = -EADDRNOTAVAIL;
 
-	read_lock(&addrconf_lock);
-	if ((idev = __in6_dev_get(dev)) != NULL) {
-		struct inet6_ifaddr *ifp;
-
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
+	if (idev) {
 		read_lock_bh(&idev->lock);
-		for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-			if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
-				ipv6_addr_copy(addr, &ifp->addr);
-				err = 0;
-				break;
-			}
-		}
+		err = __ipv6_get_lladdr(idev, addr, banned_flags);
 		read_unlock_bh(&idev->lock);
 	}
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 	return err;
 }
 
@@ -976,112 +1475,142 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
-	for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
+	list_for_each_entry(ifp, &idev->addr_list, if_list)
 		cnt++;
 	read_unlock_bh(&idev->lock);
 	return cnt;
 }
 
-int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
+int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+		  const struct net_device *dev, int strict)
 {
-	struct inet6_ifaddr * ifp;
-	u8 hash = ipv6_addr_hash(addr);
+	struct inet6_ifaddr *ifp;
+	unsigned int hash = inet6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
+		if (!net_eq(dev_net(ifp->idev->dev), net))
+			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
-		    !(ifp->flags&IFA_F_TENTATIVE)) {
-			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
-				break;
+		    !(ifp->flags&IFA_F_TENTATIVE) &&
+		    (dev == NULL || ifp->idev->dev == dev ||
+		     !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
+			rcu_read_unlock_bh();
+			return 1;
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
-	return ifp != NULL;
+
+	rcu_read_unlock_bh();
+	return 0;
 }
+EXPORT_SYMBOL(ipv6_chk_addr);
 
-static
-int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+			       struct net_device *dev)
 {
-	struct inet6_ifaddr * ifp;
-	u8 hash = ipv6_addr_hash(addr);
+	unsigned int hash = inet6_addr_hash(addr);
+	struct inet6_ifaddr *ifp;
 
-	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+	hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
+		if (!net_eq(dev_net(ifp->idev->dev), net))
+			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
 			if (dev == NULL || ifp->idev->dev == dev)
-				break;
+				return true;
 		}
 	}
-	return ifp != NULL;
+	return false;
 }
 
-struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict)
+/* Compares an address/prefix_len with addresses on device @dev.
+ * If one is found it returns true.
+ */
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+	const unsigned int prefix_len, struct net_device *dev)
 {
-	struct inet6_ifaddr * ifp;
-	u8 hash = ipv6_addr_hash(addr);
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifa;
+	bool ret = false;
 
-	read_lock_bh(&addrconf_hash_lock);
-	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-		if (ipv6_addr_equal(&ifp->addr, addr)) {
-			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
-				in6_ifa_hold(ifp);
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
+	if (idev) {
+		read_lock_bh(&idev->lock);
+		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+			ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
+			if (ret)
 				break;
-			}
 		}
+		read_unlock_bh(&idev->lock);
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock();
 
-	return ifp;
+	return ret;
 }
+EXPORT_SYMBOL(ipv6_chk_custom_prefix);
 
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
+int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
 {
-	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
-	const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
-	u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
-	u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
-	int sk_ipv6only = ipv6_only_sock(sk);
-	int sk2_ipv6only = inet_v6_ipv6only(sk2);
-	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
-	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
-
-	if (!sk2_rcv_saddr && !sk_ipv6only)
-		return 1;
-
-	if (addr_type2 == IPV6_ADDR_ANY &&
-	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
-		return 1;
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifa;
+	int	onlink;
 
-	if (addr_type == IPV6_ADDR_ANY &&
-	    !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
-		return 1;
+	onlink = 0;
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
+	if (idev) {
+		read_lock_bh(&idev->lock);
+		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+			onlink = ipv6_prefix_equal(addr, &ifa->addr,
+						   ifa->prefix_len);
+			if (onlink)
+				break;
+		}
+		read_unlock_bh(&idev->lock);
+	}
+	rcu_read_unlock();
+	return onlink;
+}
+EXPORT_SYMBOL(ipv6_chk_prefix);
 
-	if (sk2_rcv_saddr6 &&
-	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
-		return 1;
+struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
+				     struct net_device *dev, int strict)
+{
+	struct inet6_ifaddr *ifp, *result = NULL;
+	unsigned int hash = inet6_addr_hash(addr);
 
-	if (addr_type == IPV6_ADDR_MAPPED &&
-	    !sk2_ipv6only &&
-	    (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr))
-		return 1;
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
+		if (!net_eq(dev_net(ifp->idev->dev), net))
+			continue;
+		if (ipv6_addr_equal(&ifp->addr, addr)) {
+			if (dev == NULL || ifp->idev->dev == dev ||
+			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+				result = ifp;
+				in6_ifa_hold(ifp);
+				break;
+			}
+		}
+	}
+	rcu_read_unlock_bh();
 
-	return 0;
+	return result;
 }
 
 /* Gets referenced address, destroys ifaddr */
 
-void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 {
-	if (net_ratelimit())
-		printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
 	if (ifp->flags&IFA_F_PERMANENT) {
 		spin_lock_bh(&ifp->lock);
-		addrconf_del_timer(ifp);
+		addrconf_del_dad_work(ifp);
 		ifp->flags |= IFA_F_TENTATIVE;
+		if (dad_failed)
+			ifp->flags |= IFA_F_DADFAILED;
 		spin_unlock_bh(&ifp->lock);
+		if (dad_failed)
+			ipv6_ifa_notify(0, ifp);
 		in6_ifa_put(ifp);
-#ifdef CONFIG_IPV6_PRIVACY
 	} else if (ifp->flags&IFA_F_TEMPORARY) {
 		struct inet6_ifaddr *ifpub;
 		spin_lock_bh(&ifp->lock);
@@ -1095,18 +1624,69 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 			spin_unlock_bh(&ifp->lock);
 		}
 		ipv6_del_addr(ifp);
-#endif
-	} else
+	} else {
 		ipv6_del_addr(ifp);
+	}
+}
+
+static int addrconf_dad_end(struct inet6_ifaddr *ifp)
+{
+	int err = -ENOENT;
+
+	spin_lock_bh(&ifp->state_lock);
+	if (ifp->state == INET6_IFADDR_STATE_DAD) {
+		ifp->state = INET6_IFADDR_STATE_POSTDAD;
+		err = 0;
+	}
+	spin_unlock_bh(&ifp->state_lock);
+
+	return err;
 }
 
+void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+{
+	struct inet6_dev *idev = ifp->idev;
+
+	if (addrconf_dad_end(ifp)) {
+		in6_ifa_put(ifp);
+		return;
+	}
+
+	net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n",
+			     ifp->idev->dev->name, &ifp->addr);
+
+	if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
+		struct in6_addr addr;
+
+		addr.s6_addr32[0] = htonl(0xfe800000);
+		addr.s6_addr32[1] = 0;
+
+		if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
+		    ipv6_addr_equal(&ifp->addr, &addr)) {
+			/* DAD failed for link-local based on MAC address */
+			idev->cnf.disable_ipv6 = 1;
+
+			pr_info("%s: IPv6 being disabled!\n",
+				ifp->idev->dev->name);
+		}
+	}
+
+	spin_lock_bh(&ifp->state_lock);
+	/* transition from _POSTDAD to _ERRDAD */
+	ifp->state = INET6_IFADDR_STATE_ERRDAD;
+	spin_unlock_bh(&ifp->state_lock);
+
+	addrconf_mod_dad_work(ifp, 0);
+}
 
 /* Join to solicited addr multicast group. */
 
-void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
+void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1114,10 +1694,12 @@ void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
 	ipv6_dev_mc_inc(dev, &maddr);
 }
 
-void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
+void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
+	ASSERT_RTNL();
+
 	if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1128,6 +1710,11 @@ void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
+	if (ifp->prefix_len >= 127) /* RFC 6164 */
+		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
 	if (ipv6_addr_any(&addr))
 		return;
@@ -1137,58 +1724,150 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+
+	ASSERT_RTNL();
+
+	if (ifp->prefix_len >= 127) /* RFC 6164 */
+		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
 	if (ipv6_addr_any(&addr))
 		return;
 	__ipv6_dev_ac_dec(ifp->idev, &addr);
 }
 
+static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
+{
+	if (dev->addr_len != ETH_ALEN)
+		return -1;
+	memcpy(eui, dev->dev_addr, 3);
+	memcpy(eui + 5, dev->dev_addr + 3, 3);
+
+	/*
+	 * The zSeries OSA network cards can be shared among various
+	 * OS instances, but the OSA cards have only one MAC address.
+	 * This leads to duplicate address conflicts in conjunction
+	 * with IPv6 if more than one instance uses the same card.
+	 *
+	 * The driver for these cards can deliver a unique 16-bit
+	 * identifier for each instance sharing the same card.  It is
+	 * placed instead of 0xFFFE in the interface identifier.  The
+	 * "u" bit of the interface identifier is not inverted in this
+	 * case.  Hence the resulting interface identifier has local
+	 * scope according to RFC2373.
+	 */
+	if (dev->dev_id) {
+		eui[3] = (dev->dev_id >> 8) & 0xFF;
+		eui[4] = dev->dev_id & 0xFF;
+	} else {
+		eui[3] = 0xFF;
+		eui[4] = 0xFE;
+		eui[0] ^= 2;
+	}
+	return 0;
+}
+
+static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
+{
+	if (dev->addr_len != IEEE802154_ADDR_LEN)
+		return -1;
+	memcpy(eui, dev->dev_addr, 8);
+	eui[0] ^= 2;
+	return 0;
+}
+
+static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev)
+{
+	union fwnet_hwaddr *ha;
+
+	if (dev->addr_len != FWNET_ALEN)
+		return -1;
+
+	ha = (union fwnet_hwaddr *)dev->dev_addr;
+
+	memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id));
+	eui[0] ^= 2;
+	return 0;
+}
+
+static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
+{
+	/* XXX: inherit EUI-64 from other interface -- yoshfuji */
+	if (dev->addr_len != ARCNET_ALEN)
+		return -1;
+	memset(eui, 0, 7);
+	eui[7] = *(u8 *)dev->dev_addr;
+	return 0;
+}
+
+static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
+{
+	if (dev->addr_len != INFINIBAND_ALEN)
+		return -1;
+	memcpy(eui, dev->dev_addr + 12, 8);
+	eui[0] |= 2;
+	return 0;
+}
+
+static int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
+{
+	if (addr == 0)
+		return -1;
+	eui[0] = (ipv4_is_zeronet(addr) || ipv4_is_private_10(addr) ||
+		  ipv4_is_loopback(addr) || ipv4_is_linklocal_169(addr) ||
+		  ipv4_is_private_172(addr) || ipv4_is_test_192(addr) ||
+		  ipv4_is_anycast_6to4(addr) || ipv4_is_private_192(addr) ||
+		  ipv4_is_test_198(addr) || ipv4_is_multicast(addr) ||
+		  ipv4_is_lbcast(addr)) ? 0x00 : 0x02;
+	eui[1] = 0;
+	eui[2] = 0x5E;
+	eui[3] = 0xFE;
+	memcpy(eui + 4, &addr, 4);
+	return 0;
+}
+
+static int addrconf_ifid_sit(u8 *eui, struct net_device *dev)
+{
+	if (dev->priv_flags & IFF_ISATAP)
+		return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
+	return -1;
+}
+
+static int addrconf_ifid_gre(u8 *eui, struct net_device *dev)
+{
+	return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
+}
+
+static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev)
+{
+	memcpy(eui, dev->perm_addr, 3);
+	memcpy(eui + 5, dev->perm_addr + 3, 3);
+	eui[3] = 0xFF;
+	eui[4] = 0xFE;
+	eui[0] ^= 2;
+	return 0;
+}
+
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
 {
 	switch (dev->type) {
 	case ARPHRD_ETHER:
 	case ARPHRD_FDDI:
-	case ARPHRD_IEEE802_TR:
-		if (dev->addr_len != ETH_ALEN)
-			return -1;
-		memcpy(eui, dev->dev_addr, 3);
-		memcpy(eui + 5, dev->dev_addr + 3, 3);
-
-		/*
-		 * The zSeries OSA network cards can be shared among various
-		 * OS instances, but the OSA cards have only one MAC address.
-		 * This leads to duplicate address conflicts in conjunction
-		 * with IPv6 if more than one instance uses the same card.
-		 * 
-		 * The driver for these cards can deliver a unique 16-bit
-		 * identifier for each instance sharing the same card.  It is
-		 * placed instead of 0xFFFE in the interface identifier.  The
-		 * "u" bit of the interface identifier is not inverted in this
-		 * case.  Hence the resulting interface identifier has local
-		 * scope according to RFC2373.
-		 */
-		if (dev->dev_id) {
-			eui[3] = (dev->dev_id >> 8) & 0xFF;
-			eui[4] = dev->dev_id & 0xFF;
-		} else {
-			eui[3] = 0xFF;
-			eui[4] = 0xFE;
-			eui[0] ^= 2;
-		}
-		return 0;
+		return addrconf_ifid_eui48(eui, dev);
 	case ARPHRD_ARCNET:
-		/* XXX: inherit EUI-64 from other interface -- yoshfuji */
-		if (dev->addr_len != ARCNET_ALEN)
-			return -1;
-		memset(eui, 0, 7);
-		eui[7] = *(u8*)dev->dev_addr;
-		return 0;
+		return addrconf_ifid_arcnet(eui, dev);
 	case ARPHRD_INFINIBAND:
-		if (dev->addr_len != INFINIBAND_ALEN)
-			return -1;
-		memcpy(eui, dev->dev_addr + 12, 8);
-		eui[0] |= 2;
-		return 0;
+		return addrconf_ifid_infiniband(eui, dev);
+	case ARPHRD_SIT:
+		return addrconf_ifid_sit(eui, dev);
+	case ARPHRD_IPGRE:
+		return addrconf_ifid_gre(eui, dev);
+	case ARPHRD_6LOWPAN:
+	case ARPHRD_IEEE802154:
+		return addrconf_ifid_eui64(eui, dev);
+	case ARPHRD_IEEE1394:
+		return addrconf_ifid_ieee1394(eui, dev);
+	case ARPHRD_TUNNEL6:
+		return addrconf_ifid_ip6tnl(eui, dev);
 	}
 	return -1;
 }
@@ -1199,7 +1878,9 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
-	for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+	list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) {
+		if (ifp->scope > IFA_LINK)
+			break;
 		if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
 			memcpy(eui, ifp->addr.s6_addr+8, 8);
 			err = 0;
@@ -1210,38 +1891,12 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
 	return err;
 }
 
-#ifdef CONFIG_IPV6_PRIVACY
 /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
-static int __ipv6_regen_rndid(struct inet6_dev *idev)
+static void __ipv6_regen_rndid(struct inet6_dev *idev)
 {
-	struct net_device *dev;
-	struct scatterlist sg[2];
-
-	sg_set_buf(&sg[0], idev->entropy, 8);
-	sg_set_buf(&sg[1], idev->work_eui64, 8);
-
-	dev = idev->dev;
-
-	if (ipv6_generate_eui64(idev->work_eui64, dev)) {
-		printk(KERN_INFO
-			"__ipv6_regen_rndid(idev=%p): cannot get EUI64 identifier; use random bytes.\n",
-			idev);
-		get_random_bytes(idev->work_eui64, sizeof(idev->work_eui64));
-	}
 regen:
-	spin_lock(&md5_tfm_lock);
-	if (unlikely(md5_tfm == NULL)) {
-		spin_unlock(&md5_tfm_lock);
-		return -1;
-	}
-	crypto_digest_init(md5_tfm);
-	crypto_digest_update(md5_tfm, sg, 2);
-	crypto_digest_final(md5_tfm, idev->work_digest);
-	spin_unlock(&md5_tfm_lock);
-
-	memcpy(idev->rndid, &idev->work_digest[0], 8);
+	get_random_bytes(idev->rndid, sizeof(idev->rndid));
 	idev->rndid[0] &= ~0x02;
-	memcpy(idev->entropy, &idev->work_digest[8], 8);
 
 	/*
 	 * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
@@ -1249,12 +1904,12 @@ regen:
 	 *
 	 *  - Reserved subnet anycast (RFC 2526)
 	 *	11111101 11....11 1xxxxxxx
-	 *  - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1
+	 *  - ISATAP (RFC4214) 6.1
 	 *	00-00-5E-FE-xx-xx-xx-xx
 	 *  - value 0
 	 *  - XXX: already assigned to an address on the device
 	 */
-	if (idev->rndid[0] == 0xfd && 
+	if (idev->rndid[0] == 0xfd &&
 	    (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff &&
 	    (idev->rndid[7]&0x80))
 		goto regen;
@@ -1264,8 +1919,6 @@ regen:
 		if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
 			goto regen;
 	}
-
-	return 0;
 }
 
 static void ipv6_regen_rndid(unsigned long data)
@@ -1273,22 +1926,22 @@ static void ipv6_regen_rndid(unsigned long data)
 	struct inet6_dev *idev = (struct inet6_dev *) data;
 	unsigned long expires;
 
-	read_lock_bh(&addrconf_lock);
+	rcu_read_lock_bh();
 	write_lock_bh(&idev->lock);
 
 	if (idev->dead)
 		goto out;
 
-	if (__ipv6_regen_rndid(idev) < 0)
-		goto out;
-	
+	__ipv6_regen_rndid(idev);
+
 	expires = jiffies +
-		idev->cnf.temp_prefered_lft * HZ - 
-		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
+		idev->cnf.temp_prefered_lft * HZ -
+		idev->cnf.regen_max_retry * idev->cnf.dad_transmits *
+		NEIGH_VAR(idev->nd_parms, RETRANS_TIME) -
+		idev->cnf.max_desync_factor * HZ;
 	if (time_before(expires, jiffies)) {
-		printk(KERN_WARNING
-			"ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
-			idev->dev->name);
+		pr_warn("%s: too short regeneration interval; timer disabled for %s\n",
+			__func__, idev->dev->name);
 		goto out;
 	}
 
@@ -1297,18 +1950,15 @@ static void ipv6_regen_rndid(unsigned long data)
 
 out:
 	write_unlock_bh(&idev->lock);
-	read_unlock_bh(&addrconf_lock);
+	rcu_read_unlock_bh();
 	in6_dev_put(idev);
 }
 
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
-	int ret = 0;
-
+static void  __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
+{
 	if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
-		ret = __ipv6_regen_rndid(idev);
-	return ret;
+		__ipv6_regen_rndid(idev);
 }
-#endif
 
 /*
  *	Add prefix route.
@@ -1318,67 +1968,81 @@ static void
 addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		      unsigned long expires, u32 flags)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg = {
+		.fc_table = RT6_TABLE_PREFIX,
+		.fc_metric = IP6_RT_PRIO_ADDRCONF,
+		.fc_ifindex = dev->ifindex,
+		.fc_expires = expires,
+		.fc_dst_len = plen,
+		.fc_flags = RTF_UP | flags,
+		.fc_nlinfo.nl_net = dev_net(dev),
+		.fc_protocol = RTPROT_KERNEL,
+	};
 
-	memset(&rtmsg, 0, sizeof(rtmsg));
-	ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx);
-	rtmsg.rtmsg_dst_len = plen;
-	rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
-	rtmsg.rtmsg_ifindex = dev->ifindex;
-	rtmsg.rtmsg_info = expires;
-	rtmsg.rtmsg_flags = RTF_UP|flags;
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+	cfg.fc_dst = *pfx;
 
 	/* Prevent useless cloning on PtP SIT.
 	   This thing is done here expecting that the whole
 	   class of non-broadcast devices need not cloning.
 	 */
-	if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
-		rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
+#if IS_ENABLED(CONFIG_IPV6_SIT)
+	if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
+		cfg.fc_flags |= RTF_NONEXTHOP;
+#endif
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&cfg);
 }
 
-/* Create "default" multicast route to the interface */
-
-static void addrconf_add_mroute(struct net_device *dev)
-{
-	struct in6_rtmsg rtmsg;
-
-	memset(&rtmsg, 0, sizeof(rtmsg));
-	ipv6_addr_set(&rtmsg.rtmsg_dst,
-		      htonl(0xFF000000), 0, 0, 0);
-	rtmsg.rtmsg_dst_len = 8;
-	rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
-	rtmsg.rtmsg_ifindex = dev->ifindex;
-	rtmsg.rtmsg_flags = RTF_UP;
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
-}
 
-static void sit_route_add(struct net_device *dev)
+static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+						  int plen,
+						  const struct net_device *dev,
+						  u32 flags, u32 noflags)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_node *fn;
+	struct rt6_info *rt = NULL;
+	struct fib6_table *table;
 
-	memset(&rtmsg, 0, sizeof(rtmsg));
+	table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+	if (table == NULL)
+		return NULL;
 
-	rtmsg.rtmsg_type	= RTMSG_NEWROUTE;
-	rtmsg.rtmsg_metric	= IP6_RT_PRIO_ADDRCONF;
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
+	if (!fn)
+		goto out;
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+		if (rt->dst.dev->ifindex != dev->ifindex)
+			continue;
+		if ((rt->rt6i_flags & flags) != flags)
+			continue;
+		if ((rt->rt6i_flags & noflags) != 0)
+			continue;
+		dst_hold(&rt->dst);
+		break;
+	}
+out:
+	read_unlock_bh(&table->tb6_lock);
+	return rt;
+}
 
-	/* prefix length - 96 bits "::d.d.d.d" */
-	rtmsg.rtmsg_dst_len	= 96;
-	rtmsg.rtmsg_flags	= RTF_UP|RTF_NONEXTHOP;
-	rtmsg.rtmsg_ifindex	= dev->ifindex;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
-}
+/* Create "default" multicast route to the interface */
 
-static void addrconf_add_lroute(struct net_device *dev)
+static void addrconf_add_mroute(struct net_device *dev)
 {
-	struct in6_addr addr;
+	struct fib6_config cfg = {
+		.fc_table = RT6_TABLE_LOCAL,
+		.fc_metric = IP6_RT_PRIO_ADDRCONF,
+		.fc_ifindex = dev->ifindex,
+		.fc_dst_len = 8,
+		.fc_flags = RTF_UP,
+		.fc_nlinfo.nl_net = dev_net(dev),
+	};
 
-	ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
-	addrconf_prefix_route(&addr, 64, dev, 0, 0);
+	ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
+
+	ip6_route_add(&cfg);
 }
 
 static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
@@ -1387,33 +2051,103 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	if ((idev = ipv6_find_idev(dev)) == NULL)
-		return NULL;
+	idev = ipv6_find_idev(dev);
+	if (!idev)
+		return ERR_PTR(-ENOBUFS);
+
+	if (idev->cnf.disable_ipv6)
+		return ERR_PTR(-EACCES);
 
 	/* Add default multicast route */
-	addrconf_add_mroute(dev);
+	if (!(dev->flags & IFF_LOOPBACK))
+		addrconf_add_mroute(dev);
 
-	/* Add link local route */
-	addrconf_add_lroute(dev);
 	return idev;
 }
 
-void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
+static void manage_tempaddrs(struct inet6_dev *idev,
+			     struct inet6_ifaddr *ifp,
+			     __u32 valid_lft, __u32 prefered_lft,
+			     bool create, unsigned long now)
+{
+	u32 flags;
+	struct inet6_ifaddr *ift;
+
+	read_lock_bh(&idev->lock);
+	/* update all temporary addresses in the list */
+	list_for_each_entry(ift, &idev->tempaddr_list, tmp_list) {
+		int age, max_valid, max_prefered;
+
+		if (ifp != ift->ifpub)
+			continue;
+
+		/* RFC 4941 section 3.3:
+		 * If a received option will extend the lifetime of a public
+		 * address, the lifetimes of temporary addresses should
+		 * be extended, subject to the overall constraint that no
+		 * temporary addresses should ever remain "valid" or "preferred"
+		 * for a time longer than (TEMP_VALID_LIFETIME) or
+		 * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively.
+		 */
+		age = (now - ift->cstamp) / HZ;
+		max_valid = idev->cnf.temp_valid_lft - age;
+		if (max_valid < 0)
+			max_valid = 0;
+
+		max_prefered = idev->cnf.temp_prefered_lft -
+			       idev->cnf.max_desync_factor - age;
+		if (max_prefered < 0)
+			max_prefered = 0;
+
+		if (valid_lft > max_valid)
+			valid_lft = max_valid;
+
+		if (prefered_lft > max_prefered)
+			prefered_lft = max_prefered;
+
+		spin_lock(&ift->lock);
+		flags = ift->flags;
+		ift->valid_lft = valid_lft;
+		ift->prefered_lft = prefered_lft;
+		ift->tstamp = now;
+		if (prefered_lft > 0)
+			ift->flags &= ~IFA_F_DEPRECATED;
+
+		spin_unlock(&ift->lock);
+		if (!(flags&IFA_F_TENTATIVE))
+			ipv6_ifa_notify(0, ift);
+	}
+
+	if ((create || list_empty(&idev->tempaddr_list)) &&
+	    idev->cnf.use_tempaddr > 0) {
+		/* When a new public address is created as described
+		 * in [ADDRCONF], also create a new temporary address.
+		 * Also create a temporary address if it's enabled but
+		 * no temporary address currently exists.
+		 */
+		read_unlock_bh(&idev->lock);
+		ipv6_create_tempaddr(ifp, NULL);
+	} else {
+		read_unlock_bh(&idev->lock);
+	}
+}
+
+void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 {
 	struct prefix_info *pinfo;
 	__u32 valid_lft;
 	__u32 prefered_lft;
 	int addr_type;
-	unsigned long rt_expires;
 	struct inet6_dev *in6_dev;
+	struct net *net = dev_net(dev);
 
 	pinfo = (struct prefix_info *) opt;
-	
+
 	if (len < sizeof(struct prefix_info)) {
-		ADBG(("addrconf: prefix option too short\n"));
+		ADBG("addrconf: prefix option too short\n");
 		return;
 	}
-	
+
 	/*
 	 *	Validation checks ([ADDRCONF], page 19)
 	 */
@@ -1427,16 +2161,15 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	prefered_lft = ntohl(pinfo->prefered);
 
 	if (prefered_lft > valid_lft) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
+		net_warn_ratelimited("addrconf: prefix option has invalid lifetime\n");
 		return;
 	}
 
 	in6_dev = in6_dev_get(dev);
 
 	if (in6_dev == NULL) {
-		if (net_ratelimit())
-			printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
+		net_dbg_ratelimited("addrconf: device %s not configured\n",
+				    dev->name);
 		return;
 	}
 
@@ -1446,90 +2179,126 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	 *	2) Configure prefixes with the auto flag set
 	 */
 
-	/* Avoid arithmetic overflow. Really, we could
-	   save rt_expires in seconds, likely valid_lft,
-	   but it would require division in fib gc, that it
-	   not good.
-	 */
-	if (valid_lft >= 0x7FFFFFFF/HZ)
-		rt_expires = 0;
-	else
-		rt_expires = jiffies + valid_lft * HZ;
-
 	if (pinfo->onlink) {
 		struct rt6_info *rt;
-		rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
+		unsigned long rt_expires;
 
-		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
-			if (rt->rt6i_flags&RTF_EXPIRES) {
-				if (valid_lft == 0) {
-					ip6_del_rt(rt, NULL, NULL, NULL);
-					rt = NULL;
-				} else {
-					rt->rt6i_expires = rt_expires;
-				}
+		/* Avoid arithmetic overflow. Really, we could
+		 * save rt_expires in seconds, likely valid_lft,
+		 * but it would require division in fib gc, that it
+		 * not good.
+		 */
+		if (HZ > USER_HZ)
+			rt_expires = addrconf_timeout_fixup(valid_lft, HZ);
+		else
+			rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ);
+
+		if (addrconf_finite_timeout(rt_expires))
+			rt_expires *= HZ;
+
+		rt = addrconf_get_prefix_route(&pinfo->prefix,
+					       pinfo->prefix_len,
+					       dev,
+					       RTF_ADDRCONF | RTF_PREFIX_RT,
+					       RTF_GATEWAY | RTF_DEFAULT);
+
+		if (rt) {
+			/* Autoconf prefix route */
+			if (valid_lft == 0) {
+				ip6_del_rt(rt);
+				rt = NULL;
+			} else if (addrconf_finite_timeout(rt_expires)) {
+				/* not infinity */
+				rt6_set_expires(rt, jiffies + rt_expires);
+			} else {
+				rt6_clean_expires(rt);
 			}
 		} else if (valid_lft) {
+			clock_t expires = 0;
+			int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
+			if (addrconf_finite_timeout(rt_expires)) {
+				/* not infinity */
+				flags |= RTF_EXPIRES;
+				expires = jiffies_to_clock_t(rt_expires);
+			}
 			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
-					      dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+					      dev, expires, flags);
 		}
-		if (rt)
-			dst_release(&rt->u.dst);
+		ip6_rt_put(rt);
 	}
 
 	/* Try to figure out our local address for this prefix */
 
 	if (pinfo->autoconf && in6_dev->cnf.autoconf) {
-		struct inet6_ifaddr * ifp;
+		struct inet6_ifaddr *ifp;
 		struct in6_addr addr;
 		int create = 0, update_lft = 0;
+		bool tokenized = false;
 
 		if (pinfo->prefix_len == 64) {
 			memcpy(&addr, &pinfo->prefix, 8);
-			if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
-			    ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+
+			if (!ipv6_addr_any(&in6_dev->token)) {
+				read_lock_bh(&in6_dev->lock);
+				memcpy(addr.s6_addr + 8,
+				       in6_dev->token.s6_addr + 8, 8);
+				read_unlock_bh(&in6_dev->lock);
+				tokenized = true;
+			} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+				   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
 				in6_dev_put(in6_dev);
 				return;
 			}
 			goto ok;
 		}
-		if (net_ratelimit())
-			printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
-			       pinfo->prefix_len);
+		net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n",
+				    pinfo->prefix_len);
 		in6_dev_put(in6_dev);
 		return;
 
 ok:
 
-		ifp = ipv6_get_ifaddr(&addr, dev, 1);
+		ifp = ipv6_get_ifaddr(net, &addr, dev, 1);
 
 		if (ifp == NULL && valid_lft) {
 			int max_addresses = in6_dev->cnf.max_addresses;
+			u32 addr_flags = 0;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+			if (in6_dev->cnf.optimistic_dad &&
+			    !net->ipv6.devconf_all->forwarding && sllao)
+				addr_flags = IFA_F_OPTIMISTIC;
+#endif
 
 			/* Do not allow to create too much of autoconfigured
 			 * addresses; this would be too easy way to crash kernel.
 			 */
 			if (!max_addresses ||
 			    ipv6_count_addresses(in6_dev) < max_addresses)
-				ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
-						    addr_type&IPV6_ADDR_SCOPE_MASK, 0);
+				ifp = ipv6_add_addr(in6_dev, &addr, NULL,
+						    pinfo->prefix_len,
+						    addr_type&IPV6_ADDR_SCOPE_MASK,
+						    addr_flags, valid_lft,
+						    prefered_lft);
 
-			if (!ifp || IS_ERR(ifp)) {
+			if (IS_ERR_OR_NULL(ifp)) {
 				in6_dev_put(in6_dev);
 				return;
 			}
 
-			update_lft = create = 1;
+			update_lft = 0;
+			create = 1;
+			spin_lock_bh(&ifp->lock);
+			ifp->flags |= IFA_F_MANAGETEMPADDR;
 			ifp->cstamp = jiffies;
-			addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
+			ifp->tokenized = tokenized;
+			spin_unlock_bh(&ifp->lock);
+			addrconf_dad_start(ifp);
 		}
 
 		if (ifp) {
-			int flags;
+			u32 flags;
 			unsigned long now;
-#ifdef CONFIG_IPV6_PRIVACY
-			struct inet6_ifaddr *ift;
-#endif
 			u32 stored_lft;
 
 			/* update lifetime (RFC2462 5.5.3 e) */
@@ -1539,20 +2308,22 @@ ok:
 				stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
 			else
 				stored_lft = 0;
-			if (!update_lft && stored_lft) {
-				if (valid_lft > MIN_VALID_LIFETIME ||
-				    valid_lft > stored_lft)
-					update_lft = 1;
-				else if (stored_lft <= MIN_VALID_LIFETIME) {
-					/* valid_lft <= stored_lft is always true */
-					/* XXX: IPsec */
-					update_lft = 0;
-				} else {
-					valid_lft = MIN_VALID_LIFETIME;
-					if (valid_lft < prefered_lft)
-						prefered_lft = valid_lft;
-					update_lft = 1;
-				}
+			if (!update_lft && !create && stored_lft) {
+				const u32 minimum_lft = min(
+					stored_lft, (u32)MIN_VALID_LIFETIME);
+				valid_lft = max(valid_lft, minimum_lft);
+
+				/* RFC4862 Section 5.5.3e:
+				 * "Note that the preferred lifetime of the
+				 *  corresponding address is always reset to
+				 *  the Preferred Lifetime in the received
+				 *  Prefix Information option, regardless of
+				 *  whether the valid lifetime is also reset or
+				 *  ignored."
+				 *
+				 * So we should always update prefered_lft here.
+				 */
+				update_lft = 1;
 			}
 
 			if (update_lft) {
@@ -1568,43 +2339,11 @@ ok:
 			} else
 				spin_unlock(&ifp->lock);
 
-#ifdef CONFIG_IPV6_PRIVACY
-			read_lock_bh(&in6_dev->lock);
-			/* update all temporary addresses in the list */
-			for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
-				/*
-				 * When adjusting the lifetimes of an existing
-				 * temporary address, only lower the lifetimes.
-				 * Implementations must not increase the
-				 * lifetimes of an existing temporary address
-				 * when processing a Prefix Information Option.
-				 */
-				spin_lock(&ift->lock);
-				flags = ift->flags;
-				if (ift->valid_lft > valid_lft &&
-				    ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
-					ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
-				if (ift->prefered_lft > prefered_lft &&
-				    ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
-					ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
-				spin_unlock(&ift->lock);
-				if (!(flags&IFA_F_TENTATIVE))
-					ipv6_ifa_notify(0, ift);
-			}
+			manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
+					 create, now);
 
-			if (create && in6_dev->cnf.use_tempaddr > 0) {
-				/*
-				 * When a new public address is created as described in [ADDRCONF],
-				 * also create a new temporary address.
-				 */
-				read_unlock_bh(&in6_dev->lock); 
-				ipv6_create_tempaddr(ifp, NULL);
-			} else {
-				read_unlock_bh(&in6_dev->lock);
-			}
-#endif
 			in6_ifa_put(ifp);
-			addrconf_verify(0);
+			addrconf_verify();
 		}
 	}
 	inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -1616,7 +2355,7 @@ ok:
  *	Special case for SIT interfaces where we create a new "virtual"
  *	device.
  */
-int addrconf_set_dstaddr(void __user *arg)
+int addrconf_set_dstaddr(struct net *net, void __user *arg)
 {
 	struct in6_ifreq ireq;
 	struct net_device *dev;
@@ -1628,15 +2367,16 @@ int addrconf_set_dstaddr(void __user *arg)
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
 		goto err_exit;
 
-	dev = __dev_get_by_index(ireq.ifr6_ifindex);
+	dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
 
 	err = -ENODEV;
 	if (dev == NULL)
 		goto err_exit;
 
+#if IS_ENABLED(CONFIG_IPV6_SIT)
 	if (dev->type == ARPHRD_SIT) {
+		const struct net_device_ops *ops = dev->netdev_ops;
 		struct ifreq ifr;
-		mm_segment_t	oldfs;
 		struct ip_tunnel_parm p;
 
 		err = -EADDRNOTAVAIL;
@@ -1650,19 +2390,26 @@ int addrconf_set_dstaddr(void __user *arg)
 		p.iph.ihl = 5;
 		p.iph.protocol = IPPROTO_IPV6;
 		p.iph.ttl = 64;
-		ifr.ifr_ifru.ifru_data = (void __user *)&p;
+		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
+
+		if (ops->ndo_do_ioctl) {
+			mm_segment_t oldfs = get_fs();
 
-		oldfs = get_fs(); set_fs(KERNEL_DS);
-		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
-		set_fs(oldfs);
+			set_fs(KERNEL_DS);
+			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+			set_fs(oldfs);
+		} else
+			err = -EOPNOTSUPP;
 
 		if (err == 0) {
 			err = -ENOBUFS;
-			if ((dev = __dev_get_by_name(p.name)) == NULL)
+			dev = __dev_get_by_name(net, p.name);
+			if (!dev)
 				goto err_exit;
 			err = dev_open(dev);
 		}
 	}
+#endif
 
 err_exit:
 	rtnl_unlock();
@@ -1672,62 +2419,116 @@ err_exit:
 /*
  *	Manual configuration of address on an interface
  */
-static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
+static int inet6_addr_add(struct net *net, int ifindex,
+			  const struct in6_addr *pfx,
+			  const struct in6_addr *peer_pfx,
+			  unsigned int plen, __u32 ifa_flags,
+			  __u32 prefered_lft, __u32 valid_lft)
 {
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
 	struct net_device *dev;
 	int scope;
+	u32 flags;
+	clock_t expires;
+	unsigned long timeout;
 
 	ASSERT_RTNL();
-	
-	if ((dev = __dev_get_by_index(ifindex)) == NULL)
+
+	if (plen > 128)
+		return -EINVAL;
+
+	/* check the lifetime */
+	if (!valid_lft || prefered_lft > valid_lft)
+		return -EINVAL;
+
+	if (ifa_flags & IFA_F_MANAGETEMPADDR && plen != 64)
+		return -EINVAL;
+
+	dev = __dev_get_by_index(net, ifindex);
+	if (!dev)
 		return -ENODEV;
-	
-	if (!(dev->flags&IFF_UP))
-		return -ENETDOWN;
 
-	if ((idev = addrconf_add_dev(dev)) == NULL)
-		return -ENOBUFS;
+	idev = addrconf_add_dev(dev);
+	if (IS_ERR(idev))
+		return PTR_ERR(idev);
 
 	scope = ipv6_addr_scope(pfx);
 
-	ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT);
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		expires = jiffies_to_clock_t(timeout * HZ);
+		valid_lft = timeout;
+		flags = RTF_EXPIRES;
+	} else {
+		expires = 0;
+		flags = 0;
+		ifa_flags |= IFA_F_PERMANENT;
+	}
+
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa_flags |= IFA_F_DEPRECATED;
+		prefered_lft = timeout;
+	}
+
+	ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
+			    valid_lft, prefered_lft);
+
 	if (!IS_ERR(ifp)) {
-		addrconf_dad_start(ifp, 0);
+		if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
+			addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
+					      expires, flags);
+		}
+
+		/*
+		 * Note that section 3.1 of RFC 4429 indicates
+		 * that the Optimistic flag should not be set for
+		 * manually configured addresses
+		 */
+		addrconf_dad_start(ifp);
+		if (ifa_flags & IFA_F_MANAGETEMPADDR)
+			manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
+					 true, jiffies);
 		in6_ifa_put(ifp);
+		addrconf_verify_rtnl();
 		return 0;
 	}
 
 	return PTR_ERR(ifp);
 }
 
-static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
+static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
+			  const struct in6_addr *pfx, unsigned int plen)
 {
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
 	struct net_device *dev;
-	
-	if ((dev = __dev_get_by_index(ifindex)) == NULL)
+
+	if (plen > 128)
+		return -EINVAL;
+
+	dev = __dev_get_by_index(net, ifindex);
+	if (!dev)
 		return -ENODEV;
 
 	if ((idev = __in6_dev_get(dev)) == NULL)
 		return -ENXIO;
 
 	read_lock_bh(&idev->lock);
-	for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		if (ifp->prefix_len == plen &&
 		    ipv6_addr_equal(pfx, &ifp->addr)) {
 			in6_ifa_hold(ifp);
 			read_unlock_bh(&idev->lock);
-			
-			ipv6_del_addr(ifp);
 
-			/* If the last address is deleted administratively,
-			   disable IPv6 on this interface.
-			 */
-			if (idev->addr_list == NULL)
-				addrconf_ifdown(idev->dev, 1);
+			if (!(ifp->flags & IFA_F_TEMPORARY) &&
+			    (ifa_flags & IFA_F_MANAGETEMPADDR))
+				manage_tempaddrs(idev, ifp, 0, 0, false,
+						 jiffies);
+			ipv6_del_addr(ifp);
+			addrconf_verify_rtnl();
 			return 0;
 		}
 	}
@@ -1736,46 +2537,68 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
 }
 
 
-int addrconf_add_ifaddr(void __user *arg)
+int addrconf_add_ifaddr(struct net *net, void __user *arg)
 {
 	struct in6_ifreq ireq;
 	int err;
-	
-	if (!capable(CAP_NET_ADMIN))
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
-	
+
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
 		return -EFAULT;
 
 	rtnl_lock();
-	err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+	err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
+			     ireq.ifr6_prefixlen, IFA_F_PERMANENT,
+			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 	rtnl_unlock();
 	return err;
 }
 
-int addrconf_del_ifaddr(void __user *arg)
+int addrconf_del_ifaddr(struct net *net, void __user *arg)
 {
 	struct in6_ifreq ireq;
 	int err;
-	
-	if (!capable(CAP_NET_ADMIN))
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
 		return -EFAULT;
 
 	rtnl_lock();
-	err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+	err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
+			     ireq.ifr6_prefixlen);
 	rtnl_unlock();
 	return err;
 }
 
+static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
+		     int plen, int scope)
+{
+	struct inet6_ifaddr *ifp;
+
+	ifp = ipv6_add_addr(idev, addr, NULL, plen,
+			    scope, IFA_F_PERMANENT,
+			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+	if (!IS_ERR(ifp)) {
+		spin_lock_bh(&ifp->lock);
+		ifp->flags &= ~IFA_F_TENTATIVE;
+		spin_unlock_bh(&ifp->lock);
+		ipv6_ifa_notify(RTM_NEWADDR, ifp);
+		in6_ifa_put(ifp);
+	}
+}
+
+#if IS_ENABLED(CONFIG_IPV6_SIT)
 static void sit_add_v4_addrs(struct inet6_dev *idev)
 {
-	struct inet6_ifaddr * ifp;
 	struct in6_addr addr;
 	struct net_device *dev;
-	int scope;
+	struct net *net = dev_net(idev->dev);
+	int scope, plen;
+	u32 pflags = 0;
 
 	ASSERT_RTNL();
 
@@ -1785,31 +2608,27 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 	if (idev->dev->flags&IFF_POINTOPOINT) {
 		addr.s6_addr32[0] = htonl(0xfe800000);
 		scope = IFA_LINK;
+		plen = 64;
 	} else {
 		scope = IPV6_ADDR_COMPATv4;
+		plen = 96;
+		pflags |= RTF_NONEXTHOP;
 	}
 
 	if (addr.s6_addr32[3]) {
-		ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT);
-		if (!IS_ERR(ifp)) {
-			spin_lock_bh(&ifp->lock);
-			ifp->flags &= ~IFA_F_TENTATIVE;
-			spin_unlock_bh(&ifp->lock);
-			ipv6_ifa_notify(RTM_NEWADDR, ifp);
-			in6_ifa_put(ifp);
-		}
+		add_addr(idev, &addr, plen, scope);
+		addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags);
 		return;
 	}
 
-        for (dev = dev_base; dev != NULL; dev = dev->next) {
-		struct in_device * in_dev = __in_dev_get_rtnl(dev);
+	for_each_netdev(net, dev) {
+		struct in_device *in_dev = __in_dev_get_rtnl(dev);
 		if (in_dev && (dev->flags & IFF_UP)) {
-			struct in_ifaddr * ifa;
+			struct in_ifaddr *ifa;
 
 			int flag = scope;
 
 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
-				int plen;
 
 				addr.s6_addr32[3] = ifa->ifa_local;
 
@@ -1820,56 +2639,91 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 						continue;
 					flag |= IFA_HOST;
 				}
-				if (idev->dev->flags&IFF_POINTOPOINT)
-					plen = 64;
-				else
-					plen = 96;
-
-				ifp = ipv6_add_addr(idev, &addr, plen, flag,
-						    IFA_F_PERMANENT);
-				if (!IS_ERR(ifp)) {
-					spin_lock_bh(&ifp->lock);
-					ifp->flags &= ~IFA_F_TENTATIVE;
-					spin_unlock_bh(&ifp->lock);
-					ipv6_ifa_notify(RTM_NEWADDR, ifp);
-					in6_ifa_put(ifp);
-				}
+
+				add_addr(idev, &addr, plen, flag);
+				addrconf_prefix_route(&addr, plen, idev->dev, 0,
+						      pflags);
 			}
 		}
-        }
+	}
 }
+#endif
 
 static void init_loopback(struct net_device *dev)
 {
 	struct inet6_dev  *idev;
-	struct inet6_ifaddr * ifp;
+	struct net_device *sp_dev;
+	struct inet6_ifaddr *sp_ifa;
+	struct rt6_info *sp_rt;
 
 	/* ::1 */
 
 	ASSERT_RTNL();
 
 	if ((idev = ipv6_find_idev(dev)) == NULL) {
-		printk(KERN_DEBUG "init loopback: add_dev failed\n");
+		pr_debug("%s: add_dev failed\n", __func__);
 		return;
 	}
 
-	ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT);
-	if (!IS_ERR(ifp)) {
-		spin_lock_bh(&ifp->lock);
-		ifp->flags &= ~IFA_F_TENTATIVE;
-		spin_unlock_bh(&ifp->lock);
-		ipv6_ifa_notify(RTM_NEWADDR, ifp);
-		in6_ifa_put(ifp);
+	add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
+
+	/* Add routes to other interface's IPv6 addresses */
+	for_each_netdev(dev_net(dev), sp_dev) {
+		if (!strcmp(sp_dev->name, dev->name))
+			continue;
+
+		idev = __in6_dev_get(sp_dev);
+		if (!idev)
+			continue;
+
+		read_lock_bh(&idev->lock);
+		list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
+
+			if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
+				continue;
+
+			if (sp_ifa->rt) {
+				/* This dst has been added to garbage list when
+				 * lo device down, release this obsolete dst and
+				 * reallocate a new router for ifa.
+				 */
+				if (sp_ifa->rt->dst.obsolete > 0) {
+					ip6_rt_put(sp_ifa->rt);
+					sp_ifa->rt = NULL;
+				} else {
+					continue;
+				}
+			}
+
+			sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
+
+			/* Failure cases are ignored */
+			if (!IS_ERR(sp_rt)) {
+				sp_ifa->rt = sp_rt;
+				ip6_ins_rt(sp_rt);
+			}
+		}
+		read_unlock_bh(&idev->lock);
 	}
 }
 
-static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
+static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr)
 {
-	struct inet6_ifaddr * ifp;
+	struct inet6_ifaddr *ifp;
+	u32 addr_flags = IFA_F_PERMANENT;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	if (idev->cnf.optimistic_dad &&
+	    !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
+		addr_flags |= IFA_F_OPTIMISTIC;
+#endif
 
-	ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, IFA_F_PERMANENT);
+
+	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
+			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 	if (!IS_ERR(ifp)) {
-		addrconf_dad_start(ifp, 0);
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
+		addrconf_dad_start(ifp);
 		in6_ifa_put(ifp);
 	}
 }
@@ -1877,21 +2731,24 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr
 static void addrconf_dev_config(struct net_device *dev)
 {
 	struct in6_addr addr;
-	struct inet6_dev    * idev;
+	struct inet6_dev *idev;
 
 	ASSERT_RTNL();
 
-	if ((dev->type != ARPHRD_ETHER) && 
+	if ((dev->type != ARPHRD_ETHER) &&
 	    (dev->type != ARPHRD_FDDI) &&
-	    (dev->type != ARPHRD_IEEE802_TR) &&
 	    (dev->type != ARPHRD_ARCNET) &&
-	    (dev->type != ARPHRD_INFINIBAND)) {
+	    (dev->type != ARPHRD_INFINIBAND) &&
+	    (dev->type != ARPHRD_IEEE802154) &&
+	    (dev->type != ARPHRD_IEEE1394) &&
+	    (dev->type != ARPHRD_TUNNEL6) &&
+	    (dev->type != ARPHRD_6LOWPAN)) {
 		/* Alas, we support only Ethernet autoconfiguration. */
 		return;
 	}
 
 	idev = addrconf_add_dev(dev);
-	if (idev == NULL)
+	if (IS_ERR(idev))
 		return;
 
 	memset(&addr, 0, sizeof(struct in6_addr));
@@ -1901,96 +2758,128 @@ static void addrconf_dev_config(struct net_device *dev)
 		addrconf_add_linklocal(idev, &addr);
 }
 
+#if IS_ENABLED(CONFIG_IPV6_SIT)
 static void addrconf_sit_config(struct net_device *dev)
 {
 	struct inet6_dev *idev;
 
 	ASSERT_RTNL();
 
-	/* 
-	 * Configure the tunnel with one of our IPv4 
-	 * addresses... we should configure all of 
+	/*
+	 * Configure the tunnel with one of our IPv4
+	 * addresses... we should configure all of
 	 * our v4 addrs in the tunnel
 	 */
 
 	if ((idev = ipv6_find_idev(dev)) == NULL) {
-		printk(KERN_DEBUG "init sit: add_dev failed\n");
+		pr_debug("%s: add_dev failed\n", __func__);
 		return;
 	}
 
-	sit_add_v4_addrs(idev);
-
-	if (dev->flags&IFF_POINTOPOINT) {
-		addrconf_add_mroute(dev);
-		addrconf_add_lroute(dev);
-	} else
-		sit_route_add(dev);
-}
-
-static inline int
-ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
-{
-	struct in6_addr lladdr;
+	if (dev->priv_flags & IFF_ISATAP) {
+		struct in6_addr addr;
 
-	if (!ipv6_get_lladdr(link_dev, &lladdr)) {
-		addrconf_add_linklocal(idev, &lladdr);
-		return 0;
+		ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
+		if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
+			addrconf_add_linklocal(idev, &addr);
+		return;
 	}
-	return -1;
-}
 
-static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
-{
-	struct net_device *link_dev;
+	sit_add_v4_addrs(idev);
 
-	/* first try to inherit the link-local address from the link device */
-	if (idev->dev->iflink &&
-	    (link_dev = __dev_get_by_index(idev->dev->iflink))) {
-		if (!ipv6_inherit_linklocal(idev, link_dev))
-			return;
-	}
-	/* then try to inherit it from any device */
-	for (link_dev = dev_base; link_dev; link_dev = link_dev->next) {
-		if (!ipv6_inherit_linklocal(idev, link_dev))
-			return;
-	}
-	printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
+	if (dev->flags&IFF_POINTOPOINT)
+		addrconf_add_mroute(dev);
 }
+#endif
 
-/*
- * Autoconfigure tunnel with a link-local address so routing protocols,
- * DHCPv6, MLD etc. can be run over the virtual link
- */
-
-static void addrconf_ip6_tnl_config(struct net_device *dev)
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+static void addrconf_gre_config(struct net_device *dev)
 {
 	struct inet6_dev *idev;
+	struct in6_addr addr;
 
 	ASSERT_RTNL();
 
-	if ((idev = addrconf_add_dev(dev)) == NULL) {
-		printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
+	if ((idev = ipv6_find_idev(dev)) == NULL) {
+		pr_debug("%s: add_dev failed\n", __func__);
 		return;
 	}
-	ip6_tnl_add_linklocal(idev);
-	addrconf_add_mroute(dev);
+
+	ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
+	if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
+		addrconf_add_linklocal(idev, &addr);
+	else
+		addrconf_prefix_route(&addr, 64, dev, 0, 0);
 }
+#endif
 
-static int addrconf_notify(struct notifier_block *this, unsigned long event, 
-			   void * data)
+static int addrconf_notify(struct notifier_block *this, unsigned long event,
+			   void *ptr)
 {
-	struct net_device *dev = (struct net_device *) data;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct inet6_dev *idev = __in6_dev_get(dev);
+	int run_pending = 0;
+	int err;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		if (!idev && dev->mtu >= IPV6_MIN_MTU) {
+			idev = ipv6_add_dev(dev);
+			if (!idev)
+				return notifier_from_errno(-ENOMEM);
+		}
+		break;
 
-	switch(event) {
 	case NETDEV_UP:
-		switch(dev->type) {
+	case NETDEV_CHANGE:
+		if (dev->flags & IFF_SLAVE)
+			break;
+
+		if (event == NETDEV_UP) {
+			if (!addrconf_qdisc_ok(dev)) {
+				/* device is not ready yet. */
+				pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
+					dev->name);
+				break;
+			}
+
+			if (!idev && dev->mtu >= IPV6_MIN_MTU)
+				idev = ipv6_add_dev(dev);
+
+			if (idev) {
+				idev->if_flags |= IF_READY;
+				run_pending = 1;
+			}
+		} else {
+			if (!addrconf_qdisc_ok(dev)) {
+				/* device is still not ready. */
+				break;
+			}
+
+			if (idev) {
+				if (idev->if_flags & IF_READY)
+					/* device is already configured. */
+					break;
+				idev->if_flags |= IF_READY;
+			}
+
+			pr_info("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n",
+				dev->name);
+
+			run_pending = 1;
+		}
+
+		switch (dev->type) {
+#if IS_ENABLED(CONFIG_IPV6_SIT)
 		case ARPHRD_SIT:
 			addrconf_sit_config(dev);
 			break;
-		case ARPHRD_TUNNEL6:
-			addrconf_ip6_tnl_config(dev);
+#endif
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+		case ARPHRD_IPGRE:
+			addrconf_gre_config(dev);
 			break;
+#endif
 		case ARPHRD_LOOPBACK:
 			init_loopback(dev);
 			break;
@@ -1998,34 +2887,51 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		default:
 			addrconf_dev_config(dev);
 			break;
-		};
+		}
+
 		if (idev) {
-			/* If the MTU changed during the interface down, when the
-			   interface up, the changed MTU must be reflected in the
-			   idev as well as routers.
+			if (run_pending)
+				addrconf_dad_run(idev);
+
+			/*
+			 * If the MTU changed during the interface down,
+			 * when the interface up, the changed MTU must be
+			 * reflected in the idev as well as routers.
 			 */
-			if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
+			if (idev->cnf.mtu6 != dev->mtu &&
+			    dev->mtu >= IPV6_MIN_MTU) {
 				rt6_mtu_change(dev, dev->mtu);
 				idev->cnf.mtu6 = dev->mtu;
 			}
 			idev->tstamp = jiffies;
 			inet6_ifinfo_notify(RTM_NEWLINK, idev);
-			/* If the changed mtu during down is lower than IPV6_MIN_MTU
-			   stop IPv6 on this interface.
+
+			/*
+			 * If the changed mtu during down is lower than
+			 * IPV6_MIN_MTU stop IPv6 on this interface.
 			 */
 			if (dev->mtu < IPV6_MIN_MTU)
-				addrconf_ifdown(dev, event != NETDEV_DOWN);
+				addrconf_ifdown(dev, 1);
 		}
 		break;
 
 	case NETDEV_CHANGEMTU:
-		if ( idev && dev->mtu >= IPV6_MIN_MTU) {
+		if (idev && dev->mtu >= IPV6_MIN_MTU) {
 			rt6_mtu_change(dev, dev->mtu);
 			idev->cnf.mtu6 = dev->mtu;
 			break;
 		}
 
-		/* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
+		if (!idev && dev->mtu >= IPV6_MIN_MTU) {
+			idev = ipv6_add_dev(dev);
+			if (idev)
+				break;
+		}
+
+		/*
+		 * if MTU under IPV6_MIN_MTU.
+		 * Stop IPv6 on this interface.
+		 */
 
 	case NETDEV_DOWN:
 	case NETDEV_UNREGISTER:
@@ -2034,22 +2940,23 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		 */
 		addrconf_ifdown(dev, event != NETDEV_DOWN);
 		break;
-	case NETDEV_CHANGE:
-		break;
+
 	case NETDEV_CHANGENAME:
-#ifdef CONFIG_SYSCTL
 		if (idev) {
-			addrconf_sysctl_unregister(&idev->cnf);
-			neigh_sysctl_unregister(idev->nd_parms);
-			neigh_sysctl_register(dev, idev->nd_parms,
-					      NET_IPV6, NET_IPV6_NEIGH, "ipv6",
-					      &ndisc_ifinfo_sysctl_change,
-					      NULL);
-			addrconf_sysctl_register(idev, &idev->cnf);
+			snmp6_unregister_dev(idev);
+			addrconf_sysctl_unregister(idev);
+			addrconf_sysctl_register(idev);
+			err = snmp6_register_dev(idev);
+			if (err)
+				return notifier_from_errno(err);
 		}
-#endif
 		break;
-	};
+
+	case NETDEV_PRE_TYPE_CHANGE:
+	case NETDEV_POST_TYPE_CHANGE:
+		addrconf_type_change(dev, event);
+		break;
+	}
 
 	return NOTIFY_OK;
 }
@@ -2059,35 +2966,46 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
  */
 static struct notifier_block ipv6_dev_notf = {
 	.notifier_call = addrconf_notify,
-	.priority = 0
 };
 
+static void addrconf_type_change(struct net_device *dev, unsigned long event)
+{
+	struct inet6_dev *idev;
+	ASSERT_RTNL();
+
+	idev = __in6_dev_get(dev);
+
+	if (event == NETDEV_POST_TYPE_CHANGE)
+		ipv6_mc_remap(idev);
+	else if (event == NETDEV_PRE_TYPE_CHANGE)
+		ipv6_mc_unmap(idev);
+}
+
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
+	struct net *net = dev_net(dev);
 	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa, **bifa;
-	int i;
+	struct inet6_ifaddr *ifa;
+	int state, i;
 
 	ASSERT_RTNL();
 
-	if (dev == &loopback_dev && how == 1)
-		how = 0;
-
-	rt6_ifdown(dev);
+	rt6_ifdown(net, dev);
 	neigh_ifdown(&nd_tbl, dev);
 
 	idev = __in6_dev_get(dev);
 	if (idev == NULL)
 		return -ENODEV;
 
-	/* Step 1: remove reference to ipv6 device from parent device.
-	           Do not dev_put!
+	/*
+	 * Step 1: remove reference to ipv6 device from parent device.
+	 *	   Do not dev_put!
 	 */
-	if (how == 1) {
-		write_lock_bh(&addrconf_lock);
-		dev->ip6_ptr = NULL;
+	if (how) {
 		idev->dead = 1;
-		write_unlock_bh(&addrconf_lock);
+
+		/* protected by rtnl_lock */
+		RCU_INIT_POINTER(dev->ip6_ptr, NULL);
 
 		/* Step 1.5: remove snmp6 entry */
 		snmp6_unregister_dev(idev);
@@ -2095,39 +3013,37 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	}
 
 	/* Step 2: clear hash table */
-	for (i=0; i<IN6_ADDR_HSIZE; i++) {
-		bifa = &inet6_addr_lst[i];
+	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
+		struct hlist_head *h = &inet6_addr_lst[i];
 
-		write_lock_bh(&addrconf_hash_lock);
-		while ((ifa = *bifa) != NULL) {
+		spin_lock_bh(&addrconf_hash_lock);
+	restart:
+		hlist_for_each_entry_rcu(ifa, h, addr_lst) {
 			if (ifa->idev == idev) {
-				*bifa = ifa->lst_next;
-				ifa->lst_next = NULL;
-				addrconf_del_timer(ifa);
-				in6_ifa_put(ifa);
-				continue;
+				hlist_del_init_rcu(&ifa->addr_lst);
+				addrconf_del_dad_work(ifa);
+				goto restart;
 			}
-			bifa = &ifa->lst_next;
 		}
-		write_unlock_bh(&addrconf_hash_lock);
+		spin_unlock_bh(&addrconf_hash_lock);
 	}
 
 	write_lock_bh(&idev->lock);
 
-	/* Step 3: clear flags for stateless addrconf */
-	if (how != 1)
-		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD);
+	addrconf_del_rs_timer(idev);
+
+	/* Step 2: clear flags for stateless addrconf */
+	if (!how)
+		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
 
-	/* Step 4: clear address list */
-#ifdef CONFIG_IPV6_PRIVACY
-	if (how == 1 && del_timer(&idev->regen_timer))
+	if (how && del_timer(&idev->regen_timer))
 		in6_dev_put(idev);
 
-	/* clear tempaddr list */
-	while ((ifa = idev->tempaddr_list) != NULL) {
-		idev->tempaddr_list = ifa->tmp_next;
-		ifa->tmp_next = NULL;
-		ifa->dead = 1;
+	/* Step 3: clear tempaddr list */
+	while (!list_empty(&idev->tempaddr_list)) {
+		ifa = list_first_entry(&idev->tempaddr_list,
+				       struct inet6_ifaddr, tmp_list);
+		list_del(&ifa->tmp_list);
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
 
@@ -2139,39 +3055,43 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		in6_ifa_put(ifa);
 		write_lock_bh(&idev->lock);
 	}
-#endif
-	while ((ifa = idev->addr_list) != NULL) {
-		idev->addr_list = ifa->if_next;
-		ifa->if_next = NULL;
-		ifa->dead = 1;
-		addrconf_del_timer(ifa);
+
+	while (!list_empty(&idev->addr_list)) {
+		ifa = list_first_entry(&idev->addr_list,
+				       struct inet6_ifaddr, if_list);
+		addrconf_del_dad_work(ifa);
+
+		list_del(&ifa->if_list);
+
 		write_unlock_bh(&idev->lock);
 
-		__ipv6_ifa_notify(RTM_DELADDR, ifa);
+		spin_lock_bh(&ifa->state_lock);
+		state = ifa->state;
+		ifa->state = INET6_IFADDR_STATE_DEAD;
+		spin_unlock_bh(&ifa->state_lock);
+
+		if (state != INET6_IFADDR_STATE_DEAD) {
+			__ipv6_ifa_notify(RTM_DELADDR, ifa);
+			inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
+		}
 		in6_ifa_put(ifa);
 
 		write_lock_bh(&idev->lock);
 	}
+
 	write_unlock_bh(&idev->lock);
 
 	/* Step 5: Discard multicast list */
-
-	if (how == 1)
+	if (how)
 		ipv6_mc_destroy_dev(idev);
 	else
 		ipv6_mc_down(idev);
 
-	/* Step 5: netlink notification of this interface */
 	idev->tstamp = jiffies;
-	inet6_ifinfo_notify(RTM_DELLINK, idev);
-	
-	/* Shot the device (if unregistered) */
 
-	if (how == 1) {
-#ifdef CONFIG_SYSCTL
-		addrconf_sysctl_unregister(&idev->cnf);
-		neigh_sysctl_unregister(idev->nd_parms);
-#endif
+	/* Last: Shot the device (if unregistered) */
+	if (how) {
+		addrconf_sysctl_unregister(idev);
 		neigh_parms_release(&nd_tbl, idev->nd_parms);
 		neigh_ifdown(&nd_tbl, dev);
 		in6_dev_put(idev);
@@ -2181,131 +3101,236 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 static void addrconf_rs_timer(unsigned long data)
 {
-	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct inet6_dev *idev = (struct inet6_dev *)data;
+	struct net_device *dev = idev->dev;
+	struct in6_addr lladdr;
 
-	if (ifp->idev->cnf.forwarding)
+	write_lock(&idev->lock);
+	if (idev->dead || !(idev->if_flags & IF_READY))
 		goto out;
 
-	if (ifp->idev->if_flags & IF_RA_RCVD) {
-		/*
-		 *	Announcement received after solicitation
-		 *	was sent
-		 */
+	if (!ipv6_accept_ra(idev))
 		goto out;
-	}
-
-	spin_lock(&ifp->lock);
-	if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
-		struct in6_addr all_routers;
 
-		/* The wait after the last probe can be shorter */
-		addrconf_mod_timer(ifp, AC_RS,
-				   (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
-				   ifp->idev->cnf.rtr_solicit_delay :
-				   ifp->idev->cnf.rtr_solicit_interval);
-		spin_unlock(&ifp->lock);
+	/* Announcement received after solicitation was sent */
+	if (idev->if_flags & IF_RA_RCVD)
+		goto out;
 
-		ipv6_addr_all_routers(&all_routers);
+	if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
+		write_unlock(&idev->lock);
+		if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
+			ndisc_send_rs(dev, &lladdr,
+				      &in6addr_linklocal_allrouters);
+		else
+			goto put;
 
-		ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+		write_lock(&idev->lock);
+		/* The wait after the last probe can be shorter */
+		addrconf_mod_rs_timer(idev, (idev->rs_probes ==
+					     idev->cnf.rtr_solicits) ?
+				      idev->cnf.rtr_solicit_delay :
+				      idev->cnf.rtr_solicit_interval);
 	} else {
-		spin_unlock(&ifp->lock);
 		/*
 		 * Note: we do not support deprecated "all on-link"
 		 * assumption any longer.
 		 */
-		printk(KERN_DEBUG "%s: no IPv6 routers present\n",
-		       ifp->idev->dev->name);
+		pr_debug("%s: no IPv6 routers present\n", idev->dev->name);
 	}
 
 out:
-	in6_ifa_put(ifp);
+	write_unlock(&idev->lock);
+put:
+	in6_dev_put(idev);
 }
 
 /*
  *	Duplicate Address Detection
  */
-static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
+static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
+{
+	unsigned long rand_num;
+	struct inet6_dev *idev = ifp->idev;
+
+	if (ifp->flags & IFA_F_OPTIMISTIC)
+		rand_num = 0;
+	else
+		rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
+
+	ifp->dad_probes = idev->cnf.dad_transmits;
+	addrconf_mod_dad_work(ifp, rand_num);
+}
+
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
-	unsigned long rand_num;
 
 	addrconf_join_solict(dev, &ifp->addr);
 
-	if (ifp->prefix_len != 128 && (ifp->flags&IFA_F_PERMANENT))
-		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0,
-					flags);
-
-	net_srandom(ifp->addr.s6_addr32[3]);
-	rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+	prandom_seed((__force u32) ifp->addr.s6_addr32[3]);
 
 	read_lock_bh(&idev->lock);
-	if (ifp->dead)
+	spin_lock(&ifp->lock);
+	if (ifp->state == INET6_IFADDR_STATE_DEAD)
 		goto out;
-	spin_lock_bh(&ifp->lock);
 
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
-	    !(ifp->flags&IFA_F_TENTATIVE)) {
-		ifp->flags &= ~IFA_F_TENTATIVE;
-		spin_unlock_bh(&ifp->lock);
+	    idev->cnf.accept_dad < 1 ||
+	    !(ifp->flags&IFA_F_TENTATIVE) ||
+	    ifp->flags & IFA_F_NODAD) {
+		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+		spin_unlock(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
 		addrconf_dad_completed(ifp);
 		return;
 	}
 
-	ifp->probes = idev->cnf.dad_transmits;
-	addrconf_mod_timer(ifp, AC_DAD, rand_num);
+	if (!(idev->if_flags & IF_READY)) {
+		spin_unlock(&ifp->lock);
+		read_unlock_bh(&idev->lock);
+		/*
+		 * If the device is not ready:
+		 * - keep it tentative if it is a permanent address.
+		 * - otherwise, kill it.
+		 */
+		in6_ifa_hold(ifp);
+		addrconf_dad_stop(ifp, 0);
+		return;
+	}
 
-	spin_unlock_bh(&ifp->lock);
+	/*
+	 * Optimistic nodes can start receiving
+	 * Frames right away
+	 */
+	if (ifp->flags & IFA_F_OPTIMISTIC)
+		ip6_ins_rt(ifp->rt);
+
+	addrconf_dad_kick(ifp);
 out:
+	spin_unlock(&ifp->lock);
 	read_unlock_bh(&idev->lock);
 }
 
-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+{
+	bool begin_dad = false;
+
+	spin_lock_bh(&ifp->state_lock);
+	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+		ifp->state = INET6_IFADDR_STATE_PREDAD;
+		begin_dad = true;
+	}
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (begin_dad)
+		addrconf_mod_dad_work(ifp, 0);
+}
+
+static void addrconf_dad_work(struct work_struct *w)
 {
-	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+						struct inet6_ifaddr,
+						dad_work);
 	struct inet6_dev *idev = ifp->idev;
-	struct in6_addr unspec;
 	struct in6_addr mcaddr;
 
-	read_lock_bh(&idev->lock);
-	if (idev->dead) {
-		read_unlock_bh(&idev->lock);
+	enum {
+		DAD_PROCESS,
+		DAD_BEGIN,
+		DAD_ABORT,
+	} action = DAD_PROCESS;
+
+	rtnl_lock();
+
+	spin_lock_bh(&ifp->state_lock);
+	if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
+		action = DAD_BEGIN;
+		ifp->state = INET6_IFADDR_STATE_DAD;
+	} else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
+		action = DAD_ABORT;
+		ifp->state = INET6_IFADDR_STATE_POSTDAD;
+	}
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (action == DAD_BEGIN) {
+		addrconf_dad_begin(ifp);
+		goto out;
+	} else if (action == DAD_ABORT) {
+		addrconf_dad_stop(ifp, 1);
 		goto out;
 	}
-	spin_lock_bh(&ifp->lock);
-	if (ifp->probes == 0) {
+
+	if (!ifp->dad_probes && addrconf_dad_end(ifp))
+		goto out;
+
+	write_lock_bh(&idev->lock);
+	if (idev->dead || !(idev->if_flags & IF_READY)) {
+		write_unlock_bh(&idev->lock);
+		goto out;
+	}
+
+	spin_lock(&ifp->lock);
+	if (ifp->state == INET6_IFADDR_STATE_DEAD) {
+		spin_unlock(&ifp->lock);
+		write_unlock_bh(&idev->lock);
+		goto out;
+	}
+
+	if (ifp->dad_probes == 0) {
 		/*
 		 * DAD was successful
 		 */
 
-		ifp->flags &= ~IFA_F_TENTATIVE;
-		spin_unlock_bh(&ifp->lock);
-		read_unlock_bh(&idev->lock);
+		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+		spin_unlock(&ifp->lock);
+		write_unlock_bh(&idev->lock);
 
 		addrconf_dad_completed(ifp);
 
 		goto out;
 	}
 
-	ifp->probes--;
-	addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
-	spin_unlock_bh(&ifp->lock);
-	read_unlock_bh(&idev->lock);
+	ifp->dad_probes--;
+	addrconf_mod_dad_work(ifp,
+			      NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
+	spin_unlock(&ifp->lock);
+	write_unlock_bh(&idev->lock);
 
 	/* send a neighbour solicitation for our addr */
-	memset(&unspec, 0, sizeof(unspec));
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
+	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
 out:
 	in6_ifa_put(ifp);
+	rtnl_unlock();
+}
+
+/* ifp->idev must be at least read locked */
+static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
+{
+	struct inet6_ifaddr *ifpiter;
+	struct inet6_dev *idev = ifp->idev;
+
+	list_for_each_entry_reverse(ifpiter, &idev->addr_list, if_list) {
+		if (ifpiter->scope > IFA_LINK)
+			break;
+		if (ifp != ifpiter && ifpiter->scope == IFA_LINK &&
+		    (ifpiter->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|
+				       IFA_F_OPTIMISTIC|IFA_F_DADFAILED)) ==
+		    IFA_F_PERMANENT)
+			return false;
+	}
+	return true;
 }
 
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 {
-	struct net_device *	dev = ifp->idev->dev;
+	struct net_device *dev = ifp->idev->dev;
+	struct in6_addr lladdr;
+	bool send_rs, send_mld;
+
+	addrconf_del_dad_work(ifp);
 
 	/*
 	 *	Configure the address for reception. Now it is valid.
@@ -2313,78 +3338,133 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 
 	ipv6_ifa_notify(RTM_NEWADDR, ifp);
 
-	/* If added prefix is link local and forwarding is off,
-	   start sending router solicitations.
+	/* If added prefix is link local and we are prepared to process
+	   router advertisements, start sending router solicitations.
 	 */
 
-	if (ifp->idev->cnf.forwarding == 0 &&
-	    ifp->idev->cnf.rtr_solicits > 0 &&
-	    (dev->flags&IFF_LOOPBACK) == 0 &&
-	    (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
-		struct in6_addr all_routers;
+	read_lock_bh(&ifp->idev->lock);
+	send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
+	send_rs = send_mld &&
+		  ipv6_accept_ra(ifp->idev) &&
+		  ifp->idev->cnf.rtr_solicits > 0 &&
+		  (dev->flags&IFF_LOOPBACK) == 0;
+	read_unlock_bh(&ifp->idev->lock);
 
-		ipv6_addr_all_routers(&all_routers);
+	/* While dad is in progress mld report's source address is in6_addrany.
+	 * Resend with proper ll now.
+	 */
+	if (send_mld)
+		ipv6_mc_dad_complete(ifp->idev);
 
+	if (send_rs) {
 		/*
 		 *	If a host as already performed a random delay
 		 *	[...] as part of DAD [...] there is no need
 		 *	to delay again before sending the first RS
 		 */
-		ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+		if (ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
+			return;
+		ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters);
 
-		spin_lock_bh(&ifp->lock);
-		ifp->probes = 1;
+		write_lock_bh(&ifp->idev->lock);
+		spin_lock(&ifp->lock);
+		ifp->idev->rs_probes = 1;
 		ifp->idev->if_flags |= IF_RS_SENT;
-		addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
-		spin_unlock_bh(&ifp->lock);
+		addrconf_mod_rs_timer(ifp->idev,
+				      ifp->idev->cnf.rtr_solicit_interval);
+		spin_unlock(&ifp->lock);
+		write_unlock_bh(&ifp->idev->lock);
 	}
 }
 
+static void addrconf_dad_run(struct inet6_dev *idev)
+{
+	struct inet6_ifaddr *ifp;
+
+	read_lock_bh(&idev->lock);
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
+		spin_lock(&ifp->lock);
+		if (ifp->flags & IFA_F_TENTATIVE &&
+		    ifp->state == INET6_IFADDR_STATE_DAD)
+			addrconf_dad_kick(ifp);
+		spin_unlock(&ifp->lock);
+	}
+	read_unlock_bh(&idev->lock);
+}
+
 #ifdef CONFIG_PROC_FS
 struct if6_iter_state {
+	struct seq_net_private p;
 	int bucket;
+	int offset;
 };
 
-static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
+static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
 {
 	struct inet6_ifaddr *ifa = NULL;
 	struct if6_iter_state *state = seq->private;
+	struct net *net = seq_file_net(seq);
+	int p = 0;
 
-	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-		ifa = inet6_addr_lst[state->bucket];
-		if (ifa)
-			break;
+	/* initial bucket if pos is 0 */
+	if (pos == 0) {
+		state->bucket = 0;
+		state->offset = 0;
 	}
-	return ifa;
+
+	for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
+		hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket],
+					 addr_lst) {
+			if (!net_eq(dev_net(ifa->idev->dev), net))
+				continue;
+			/* sync with offset */
+			if (p < state->offset) {
+				p++;
+				continue;
+			}
+			state->offset++;
+			return ifa;
+		}
+
+		/* prepare for next bucket */
+		state->offset = 0;
+		p = 0;
+	}
+	return NULL;
 }
 
-static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
+static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
+					 struct inet6_ifaddr *ifa)
 {
 	struct if6_iter_state *state = seq->private;
+	struct net *net = seq_file_net(seq);
 
-	ifa = ifa->lst_next;
-try_again:
-	if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
-		ifa = inet6_addr_lst[state->bucket];
-		goto try_again;
+	hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) {
+		if (!net_eq(dev_net(ifa->idev->dev), net))
+			continue;
+		state->offset++;
+		return ifa;
 	}
-	return ifa;
-}
 
-static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
-{
-	struct inet6_ifaddr *ifa = if6_get_first(seq);
+	while (++state->bucket < IN6_ADDR_HSIZE) {
+		state->offset = 0;
+		hlist_for_each_entry_rcu_bh(ifa,
+				     &inet6_addr_lst[state->bucket], addr_lst) {
+			if (!net_eq(dev_net(ifa->idev->dev), net))
+				continue;
+			state->offset++;
+			return ifa;
+		}
+	}
 
-	if (ifa)
-		while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
-			--pos;
-	return pos ? NULL : ifa;
+	return NULL;
 }
 
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(rcu_bh)
 {
-	read_lock_bh(&addrconf_hash_lock);
-	return if6_get_idx(seq, *pos);
+	rcu_read_lock_bh();
+	return if6_get_first(seq, *pos);
 }
 
 static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -2397,25 +3477,25 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void if6_seq_stop(struct seq_file *seq, void *v)
+	__releases(rcu_bh)
 {
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 }
 
 static int if6_seq_show(struct seq_file *seq, void *v)
 {
 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
-	seq_printf(seq,
-		   "%04x%04x%04x%04x%04x%04x%04x%04x %02x %02x %02x %02x %8s\n",
-		   NIP6(ifp->addr),
+	seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
+		   &ifp->addr,
 		   ifp->idev->dev->ifindex,
 		   ifp->prefix_len,
 		   ifp->scope,
-		   ifp->flags,
+		   (u8) ifp->flags,
 		   ifp->idev->dev->name);
 	return 0;
 }
 
-static struct seq_operations if6_seq_ops = {
+static const struct seq_operations if6_seq_ops = {
 	.start	= if6_seq_start,
 	.next	= if6_seq_next,
 	.show	= if6_seq_show,
@@ -2424,94 +3504,115 @@ static struct seq_operations if6_seq_ops = {
 
 static int if6_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int rc = -ENOMEM;
-	struct if6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
-
-	if (!s)
-		goto out;
-	memset(s, 0, sizeof(*s));
-
-	rc = seq_open(file, &if6_seq_ops);
-	if (rc)
-		goto out_kfree;
-
-	seq = file->private_data;
-	seq->private = s;
-out:
-	return rc;
-out_kfree:
-	kfree(s);
-	goto out;
+	return seq_open_net(inode, file, &if6_seq_ops,
+			    sizeof(struct if6_iter_state));
 }
 
-static struct file_operations if6_fops = {
+static const struct file_operations if6_fops = {
 	.owner		= THIS_MODULE,
 	.open		= if6_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= seq_release_net,
 };
 
-int __init if6_proc_init(void)
+static int __net_init if6_proc_net_init(struct net *net)
 {
-	if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
+	if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
 		return -ENOMEM;
 	return 0;
 }
 
+static void __net_exit if6_proc_net_exit(struct net *net)
+{
+	remove_proc_entry("if_inet6", net->proc_net);
+}
+
+static struct pernet_operations if6_proc_net_ops = {
+       .init = if6_proc_net_init,
+       .exit = if6_proc_net_exit,
+};
+
+int __init if6_proc_init(void)
+{
+	return register_pernet_subsys(&if6_proc_net_ops);
+}
+
 void if6_proc_exit(void)
 {
-	proc_net_remove("if_inet6");
+	unregister_pernet_subsys(&if6_proc_net_ops);
 }
 #endif	/* CONFIG_PROC_FS */
 
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+/* Check if address is a home address configured on any interface. */
+int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
+{
+	int ret = 0;
+	struct inet6_ifaddr *ifp = NULL;
+	unsigned int hash = inet6_addr_hash(addr);
+
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
+		if (!net_eq(dev_net(ifp->idev->dev), net))
+			continue;
+		if (ipv6_addr_equal(&ifp->addr, addr) &&
+		    (ifp->flags & IFA_F_HOMEADDRESS)) {
+			ret = 1;
+			break;
+		}
+	}
+	rcu_read_unlock_bh();
+	return ret;
+}
+#endif
+
 /*
  *	Periodic address status verification
  */
 
-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
 {
+	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
-	unsigned long now, next;
 	int i;
 
-	spin_lock_bh(&addrconf_verify_lock);
-	now = jiffies;
-	next = now + ADDR_CHECK_FREQUENCY;
+	ASSERT_RTNL();
 
-	del_timer(&addr_chk_timer);
+	rcu_read_lock_bh();
+	now = jiffies;
+	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 
-	for (i=0; i < IN6_ADDR_HSIZE; i++) {
+	cancel_delayed_work(&addr_chk_work);
 
+	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-		write_lock(&addrconf_hash_lock);
-		for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
+		hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
-#ifdef CONFIG_IPV6_PRIVACY
-			unsigned long regen_advance;
-#endif
 
-			if (ifp->flags & IFA_F_PERMANENT)
+			/* When setting preferred_lft to a value not zero or
+			 * infinity, while valid_lft is infinity
+			 * IFA_F_PERMANENT has a non-infinity life time.
+			 */
+			if ((ifp->flags & IFA_F_PERMANENT) &&
+			    (ifp->prefered_lft == INFINITY_LIFE_TIME))
 				continue;
 
 			spin_lock(&ifp->lock);
-			age = (now - ifp->tstamp) / HZ;
-
-#ifdef CONFIG_IPV6_PRIVACY
-			regen_advance = ifp->idev->cnf.regen_max_retry * 
-					ifp->idev->cnf.dad_transmits * 
-					ifp->idev->nd_parms->retrans_time / HZ;
-#endif
+			/* We try to batch several events at once. */
+			age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 
-			if (age >= ifp->valid_lft) {
+			if (ifp->valid_lft != INFINITY_LIFE_TIME &&
+			    age >= ifp->valid_lft) {
 				spin_unlock(&ifp->lock);
 				in6_ifa_hold(ifp);
-				write_unlock(&addrconf_hash_lock);
 				ipv6_del_addr(ifp);
 				goto restart;
+			} else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
+				spin_unlock(&ifp->lock);
+				continue;
 			} else if (age >= ifp->prefered_lft) {
-				/* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
+				/* jiffies - ifp->tstamp > age >= ifp->prefered_lft */
 				int deprecate = 0;
 
 				if (!(ifp->flags&IFA_F_DEPRECATED)) {
@@ -2519,22 +3620,25 @@ restart:
 					ifp->flags |= IFA_F_DEPRECATED;
 				}
 
-				if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
+				if ((ifp->valid_lft != INFINITY_LIFE_TIME) &&
+				    (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)))
 					next = ifp->tstamp + ifp->valid_lft * HZ;
 
 				spin_unlock(&ifp->lock);
 
 				if (deprecate) {
 					in6_ifa_hold(ifp);
-					write_unlock(&addrconf_hash_lock);
 
 					ipv6_ifa_notify(0, ifp);
 					in6_ifa_put(ifp);
 					goto restart;
 				}
-#ifdef CONFIG_IPV6_PRIVACY
 			} else if ((ifp->flags&IFA_F_TEMPORARY) &&
 				   !(ifp->flags&IFA_F_TENTATIVE)) {
+				unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
+					ifp->idev->cnf.dad_transmits *
+					NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME) / HZ;
+
 				if (age >= ifp->prefered_lft - regen_advance) {
 					struct inet6_ifaddr *ifpub = ifp->ifpub;
 					if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -2544,7 +3648,10 @@ restart:
 						in6_ifa_hold(ifp);
 						in6_ifa_hold(ifpub);
 						spin_unlock(&ifp->lock);
-						write_unlock(&addrconf_hash_lock);
+
+						spin_lock(&ifpub->lock);
+						ifpub->regen_count = 0;
+						spin_unlock(&ifpub->lock);
 						ipv6_create_tempaddr(ifpub, ifp);
 						in6_ifa_put(ifpub);
 						in6_ifa_put(ifp);
@@ -2553,7 +3660,6 @@ restart:
 				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
 					next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
 				spin_unlock(&ifp->lock);
-#endif
 			} else {
 				/* ifp->prefered_lft <= ifp->valid_lft */
 				if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -2561,281 +3667,521 @@ restart:
 				spin_unlock(&ifp->lock);
 			}
 		}
-		write_unlock(&addrconf_hash_lock);
 	}
 
-	addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
-	add_timer(&addr_chk_timer);
-	spin_unlock_bh(&addrconf_verify_lock);
+	next_sec = round_jiffies_up(next);
+	next_sched = next;
+
+	/* If rounded timeout is accurate enough, accept it. */
+	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+		next_sched = next_sec;
+
+	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+	if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
+		next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
+
+	ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+	      now, next, next_sec, next_sched);
+	mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
+	rcu_read_unlock_bh();
 }
 
-static int
-inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static void addrconf_verify_work(struct work_struct *w)
 {
-	struct rtattr **rta = arg;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
-	struct in6_addr *pfx;
+	rtnl_lock();
+	addrconf_verify_rtnl();
+	rtnl_unlock();
+}
 
-	pfx = NULL;
-	if (rta[IFA_ADDRESS-1]) {
-		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
-	}
-	if (rta[IFA_LOCAL-1]) {
-		if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_LOCAL-1]);
+static void addrconf_verify(void)
+{
+	mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
+static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
+				     struct in6_addr **peer_pfx)
+{
+	struct in6_addr *pfx = NULL;
+
+	*peer_pfx = NULL;
+
+	if (addr)
+		pfx = nla_data(addr);
+
+	if (local) {
+		if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
+			*peer_pfx = pfx;
+		pfx = nla_data(local);
 	}
+
+	return pfx;
+}
+
+static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
+	[IFA_ADDRESS]		= { .len = sizeof(struct in6_addr) },
+	[IFA_LOCAL]		= { .len = sizeof(struct in6_addr) },
+	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
+	[IFA_FLAGS]		= { .len = sizeof(u32) },
+};
+
+static int
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifaddrmsg *ifm;
+	struct nlattr *tb[IFA_MAX+1];
+	struct in6_addr *pfx, *peer_pfx;
+	u32 ifa_flags;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+	if (err < 0)
+		return err;
+
+	ifm = nlmsg_data(nlh);
+	pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
 	if (pfx == NULL)
 		return -EINVAL;
 
-	return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+	ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
+	/* We ignore other flags so far. */
+	ifa_flags &= IFA_F_MANAGETEMPADDR;
+
+	return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+			      ifm->ifa_prefixlen);
 }
 
-static int
-inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
+			     u32 prefered_lft, u32 valid_lft)
 {
-	struct rtattr  **rta = arg;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
-	struct in6_addr *pfx;
+	u32 flags;
+	clock_t expires;
+	unsigned long timeout;
+	bool was_managetempaddr;
+	bool had_prefixroute;
 
-	pfx = NULL;
-	if (rta[IFA_ADDRESS-1]) {
-		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
+	ASSERT_RTNL();
+
+	if (!valid_lft || (prefered_lft > valid_lft))
+		return -EINVAL;
+
+	if (ifa_flags & IFA_F_MANAGETEMPADDR &&
+	    (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
+		return -EINVAL;
+
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		expires = jiffies_to_clock_t(timeout * HZ);
+		valid_lft = timeout;
+		flags = RTF_EXPIRES;
+	} else {
+		expires = 0;
+		flags = 0;
+		ifa_flags |= IFA_F_PERMANENT;
+	}
+
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa_flags |= IFA_F_DEPRECATED;
+		prefered_lft = timeout;
 	}
-	if (rta[IFA_LOCAL-1]) {
-		if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_LOCAL-1]);
+
+	spin_lock_bh(&ifp->lock);
+	was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR;
+	had_prefixroute = ifp->flags & IFA_F_PERMANENT &&
+			  !(ifp->flags & IFA_F_NOPREFIXROUTE);
+	ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD |
+			IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
+			IFA_F_NOPREFIXROUTE);
+	ifp->flags |= ifa_flags;
+	ifp->tstamp = jiffies;
+	ifp->valid_lft = valid_lft;
+	ifp->prefered_lft = prefered_lft;
+
+	spin_unlock_bh(&ifp->lock);
+	if (!(ifp->flags&IFA_F_TENTATIVE))
+		ipv6_ifa_notify(0, ifp);
+
+	if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
+				      expires, flags);
+	} else if (had_prefixroute) {
+		enum cleanup_prefix_rt_t action;
+		unsigned long rt_expires;
+
+		write_lock_bh(&ifp->idev->lock);
+		action = check_cleanup_prefix_route(ifp, &rt_expires);
+		write_unlock_bh(&ifp->idev->lock);
+
+		if (action != CLEANUP_PREFIX_RT_NOP) {
+			cleanup_prefix_route(ifp, rt_expires,
+				action == CLEANUP_PREFIX_RT_DEL);
+		}
 	}
+
+	if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
+		if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR))
+			valid_lft = prefered_lft = 0;
+		manage_tempaddrs(ifp->idev, ifp, valid_lft, prefered_lft,
+				 !was_managetempaddr, jiffies);
+	}
+
+	addrconf_verify_rtnl();
+
+	return 0;
+}
+
+static int
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifaddrmsg *ifm;
+	struct nlattr *tb[IFA_MAX+1];
+	struct in6_addr *pfx, *peer_pfx;
+	struct inet6_ifaddr *ifa;
+	struct net_device *dev;
+	u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
+	u32 ifa_flags;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+	if (err < 0)
+		return err;
+
+	ifm = nlmsg_data(nlh);
+	pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
 	if (pfx == NULL)
 		return -EINVAL;
 
-	return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+	if (tb[IFA_CACHEINFO]) {
+		struct ifa_cacheinfo *ci;
+
+		ci = nla_data(tb[IFA_CACHEINFO]);
+		valid_lft = ci->ifa_valid;
+		preferred_lft = ci->ifa_prefered;
+	} else {
+		preferred_lft = INFINITY_LIFE_TIME;
+		valid_lft = INFINITY_LIFE_TIME;
+	}
+
+	dev =  __dev_get_by_index(net, ifm->ifa_index);
+	if (dev == NULL)
+		return -ENODEV;
+
+	ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
+	/* We ignore other flags so far. */
+	ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
+		     IFA_F_NOPREFIXROUTE;
+
+	ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
+	if (ifa == NULL) {
+		/*
+		 * It would be best to check for !NLM_F_CREATE here but
+		 * userspace already relies on not having to provide this.
+		 */
+		return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
+				      ifm->ifa_prefixlen, ifa_flags,
+				      preferred_lft, valid_lft);
+	}
+
+	if (nlh->nlmsg_flags & NLM_F_EXCL ||
+	    !(nlh->nlmsg_flags & NLM_F_REPLACE))
+		err = -EEXIST;
+	else
+		err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
+
+	in6_ifa_put(ifa);
+
+	return err;
 }
 
-static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
-			     u32 pid, u32 seq, int event, unsigned int flags)
+static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u32 flags,
+			  u8 scope, int ifindex)
 {
 	struct ifaddrmsg *ifm;
-	struct nlmsghdr  *nlh;
-	struct ifa_cacheinfo ci;
-	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
+	ifm = nlmsg_data(nlh);
 	ifm->ifa_family = AF_INET6;
-	ifm->ifa_prefixlen = ifa->prefix_len;
-	ifm->ifa_flags = ifa->flags;
-	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
-	if (ifa->scope&IFA_HOST)
-		ifm->ifa_scope = RT_SCOPE_HOST;
-	else if (ifa->scope&IFA_LINK)
-		ifm->ifa_scope = RT_SCOPE_LINK;
-	else if (ifa->scope&IFA_SITE)
-		ifm->ifa_scope = RT_SCOPE_SITE;
-	ifm->ifa_index = ifa->idev->dev->ifindex;
-	RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
-	if (!(ifa->flags&IFA_F_PERMANENT)) {
-		ci.ifa_prefered = ifa->prefered_lft;
-		ci.ifa_valid = ifa->valid_lft;
-		if (ci.ifa_prefered != INFINITY_LIFE_TIME) {
+	ifm->ifa_prefixlen = prefixlen;
+	ifm->ifa_flags = flags;
+	ifm->ifa_scope = scope;
+	ifm->ifa_index = ifindex;
+}
+
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+			 unsigned long tstamp, u32 preferred, u32 valid)
+{
+	struct ifa_cacheinfo ci;
+
+	ci.cstamp = cstamp_delta(cstamp);
+	ci.tstamp = cstamp_delta(tstamp);
+	ci.ifa_prefered = preferred;
+	ci.ifa_valid = valid;
+
+	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+}
+
+static inline int rt_scope(int ifa_scope)
+{
+	if (ifa_scope & IFA_HOST)
+		return RT_SCOPE_HOST;
+	else if (ifa_scope & IFA_LINK)
+		return RT_SCOPE_LINK;
+	else if (ifa_scope & IFA_SITE)
+		return RT_SCOPE_SITE;
+	else
+		return RT_SCOPE_UNIVERSE;
+}
+
+static inline int inet6_ifaddr_msgsize(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+	       + nla_total_size(16) /* IFA_LOCAL */
+	       + nla_total_size(16) /* IFA_ADDRESS */
+	       + nla_total_size(sizeof(struct ifa_cacheinfo))
+	       + nla_total_size(4)  /* IFA_FLAGS */;
+}
+
+static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+			     u32 portid, u32 seq, int event, unsigned int flags)
+{
+	struct nlmsghdr  *nlh;
+	u32 preferred, valid;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
+		      ifa->idev->dev->ifindex);
+
+	if (!((ifa->flags&IFA_F_PERMANENT) &&
+	      (ifa->prefered_lft == INFINITY_LIFE_TIME))) {
+		preferred = ifa->prefered_lft;
+		valid = ifa->valid_lft;
+		if (preferred != INFINITY_LIFE_TIME) {
 			long tval = (jiffies - ifa->tstamp)/HZ;
-			ci.ifa_prefered -= tval;
-			if (ci.ifa_valid != INFINITY_LIFE_TIME)
-				ci.ifa_valid -= tval;
+			if (preferred > tval)
+				preferred -= tval;
+			else
+				preferred = 0;
+			if (valid != INFINITY_LIFE_TIME) {
+				if (valid > tval)
+					valid -= tval;
+				else
+					valid = 0;
+			}
 		}
 	} else {
-		ci.ifa_prefered = INFINITY_LIFE_TIME;
-		ci.ifa_valid = INFINITY_LIFE_TIME;
-	}
-	ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+		preferred = INFINITY_LIFE_TIME;
+		valid = INFINITY_LIFE_TIME;
+	}
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	if (!ipv6_addr_any(&ifa->peer_addr)) {
+		if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 ||
+		    nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0)
+			goto error;
+	} else
+		if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0)
+			goto error;
+
+	if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+		goto error;
+
+	if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
+		goto error;
+
+	return nlmsg_end(skb, nlh);
+
+error:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
 }
 
 static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
-				u32 pid, u32 seq, int event, u16 flags)
+				u32 portid, u32 seq, int event, u16 flags)
 {
-	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	struct ifa_cacheinfo ci;
-	unsigned char	 *b = skb->tail;
-
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
-	ifm->ifa_family = AF_INET6;	
-	ifm->ifa_prefixlen = 128;
-	ifm->ifa_flags = IFA_F_PERMANENT;
-	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
-	if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE)
-		ifm->ifa_scope = RT_SCOPE_SITE;
-	ifm->ifa_index = ifmca->idev->dev->ifindex;
-	RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
-	ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.ifa_prefered = INFINITY_LIFE_TIME;
-	ci.ifa_valid = INFINITY_LIFE_TIME;
-	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	u8 scope = RT_SCOPE_UNIVERSE;
+	int ifindex = ifmca->idev->dev->ifindex;
+
+	if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+		scope = RT_SCOPE_SITE;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+	if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
+	    put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
+		nlmsg_cancel(skb, nlh);
+		return -EMSGSIZE;
+	}
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return nlmsg_end(skb, nlh);
 }
 
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
-				u32 pid, u32 seq, int event, unsigned int flags)
+				u32 portid, u32 seq, int event, unsigned int flags)
 {
-	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	struct ifa_cacheinfo ci;
-	unsigned char	 *b = skb->tail;
-
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
-	ifm->ifa_family = AF_INET6;	
-	ifm->ifa_prefixlen = 128;
-	ifm->ifa_flags = IFA_F_PERMANENT;
-	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
-	if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE)
-		ifm->ifa_scope = RT_SCOPE_SITE;
-	ifm->ifa_index = ifaca->aca_idev->dev->ifindex;
-	RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
-	ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.ifa_prefered = INFINITY_LIFE_TIME;
-	ci.ifa_valid = INFINITY_LIFE_TIME;
-	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	u8 scope = RT_SCOPE_UNIVERSE;
+	int ifindex = ifaca->aca_idev->dev->ifindex;
+
+	if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
+		scope = RT_SCOPE_SITE;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+	if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
+	    put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
+		nlmsg_cancel(skb, nlh);
+		return -EMSGSIZE;
+	}
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return nlmsg_end(skb, nlh);
 }
 
-enum addr_type_t
-{
+enum addr_type_t {
 	UNICAST_ADDR,
 	MULTICAST_ADDR,
 	ANYCAST_ADDR,
 };
 
+/* called with rcu_read_lock() */
+static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
+			  struct netlink_callback *cb, enum addr_type_t type,
+			  int s_ip_idx, int *p_ip_idx)
+{
+	struct ifmcaddr6 *ifmca;
+	struct ifacaddr6 *ifaca;
+	int err = 1;
+	int ip_idx = *p_ip_idx;
+
+	read_lock_bh(&idev->lock);
+	switch (type) {
+	case UNICAST_ADDR: {
+		struct inet6_ifaddr *ifa;
+
+		/* unicast address incl. temp addr */
+		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+			if (++ip_idx < s_ip_idx)
+				continue;
+			err = inet6_fill_ifaddr(skb, ifa,
+						NETLINK_CB(cb->skb).portid,
+						cb->nlh->nlmsg_seq,
+						RTM_NEWADDR,
+						NLM_F_MULTI);
+			if (err <= 0)
+				break;
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+		}
+		break;
+	}
+	case MULTICAST_ADDR:
+		/* multicast address */
+		for (ifmca = idev->mc_list; ifmca;
+		     ifmca = ifmca->next, ip_idx++) {
+			if (ip_idx < s_ip_idx)
+				continue;
+			err = inet6_fill_ifmcaddr(skb, ifmca,
+						  NETLINK_CB(cb->skb).portid,
+						  cb->nlh->nlmsg_seq,
+						  RTM_GETMULTICAST,
+						  NLM_F_MULTI);
+			if (err <= 0)
+				break;
+		}
+		break;
+	case ANYCAST_ADDR:
+		/* anycast address */
+		for (ifaca = idev->ac_list; ifaca;
+		     ifaca = ifaca->aca_next, ip_idx++) {
+			if (ip_idx < s_ip_idx)
+				continue;
+			err = inet6_fill_ifacaddr(skb, ifaca,
+						  NETLINK_CB(cb->skb).portid,
+						  cb->nlh->nlmsg_seq,
+						  RTM_GETANYCAST,
+						  NLM_F_MULTI);
+			if (err <= 0)
+				break;
+		}
+		break;
+	default:
+		break;
+	}
+	read_unlock_bh(&idev->lock);
+	*p_ip_idx = ip_idx;
+	return err;
+}
+
 static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 			   enum addr_type_t type)
 {
+	struct net *net = sock_net(skb->sk);
+	int h, s_h;
 	int idx, ip_idx;
 	int s_idx, s_ip_idx;
-	int err = 1;
 	struct net_device *dev;
-	struct inet6_dev *idev = NULL;
-	struct inet6_ifaddr *ifa;
-	struct ifmcaddr6 *ifmca;
-	struct ifacaddr6 *ifaca;
-
-	s_idx = cb->args[0];
-	s_ip_idx = ip_idx = cb->args[1];
-	read_lock(&dev_base_lock);
-	
-	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
-		if (idx < s_idx)
-			continue;
-		if (idx > s_idx)
-			s_ip_idx = 0;
-		ip_idx = 0;
-		if ((idev = in6_dev_get(dev)) == NULL)
-			continue;
-		read_lock_bh(&idev->lock);
-		switch (type) {
-		case UNICAST_ADDR:
-			/* unicast address incl. temp addr */
-			for (ifa = idev->addr_list; ifa;
-			     ifa = ifa->if_next, ip_idx++) {
-				if (ip_idx < s_ip_idx)
-					continue;
-				if ((err = inet6_fill_ifaddr(skb, ifa, 
-				    NETLINK_CB(cb->skb).pid, 
-				    cb->nlh->nlmsg_seq, RTM_NEWADDR,
-				    NLM_F_MULTI)) <= 0)
-					goto done;
-			}
-			break;
-		case MULTICAST_ADDR:
-			/* multicast address */
-			for (ifmca = idev->mc_list; ifmca; 
-			     ifmca = ifmca->next, ip_idx++) {
-				if (ip_idx < s_ip_idx)
-					continue;
-				if ((err = inet6_fill_ifmcaddr(skb, ifmca, 
-				    NETLINK_CB(cb->skb).pid, 
-				    cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
-				    NLM_F_MULTI)) <= 0)
-					goto done;
-			}
-			break;
-		case ANYCAST_ADDR:
-			/* anycast address */
-			for (ifaca = idev->ac_list; ifaca;
-			     ifaca = ifaca->aca_next, ip_idx++) {
-				if (ip_idx < s_ip_idx)
-					continue;
-				if ((err = inet6_fill_ifacaddr(skb, ifaca, 
-				    NETLINK_CB(cb->skb).pid, 
-				    cb->nlh->nlmsg_seq, RTM_GETANYCAST,
-				    NLM_F_MULTI)) <= 0) 
-					goto done;
-			}
-			break;
-		default:
-			break;
+	struct inet6_dev *idev;
+	struct hlist_head *head;
+
+	s_h = cb->args[0];
+	s_idx = idx = cb->args[1];
+	s_ip_idx = ip_idx = cb->args[2];
+
+	rcu_read_lock();
+	cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			if (h > s_h || idx > s_idx)
+				s_ip_idx = 0;
+			ip_idx = 0;
+			idev = __in6_dev_get(dev);
+			if (!idev)
+				goto cont;
+
+			if (in6_dump_addrs(idev, skb, cb, type,
+					   s_ip_idx, &ip_idx) <= 0)
+				goto done;
+cont:
+			idx++;
 		}
-		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
 done:
-	if (err <= 0) {
-		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
-	}
-	read_unlock(&dev_base_lock);
-	cb->args[0] = idx;
-	cb->args[1] = ip_idx;
+	rcu_read_unlock();
+	cb->args[0] = h;
+	cb->args[1] = idx;
+	cb->args[2] = ip_idx;
+
 	return skb->len;
 }
 
 static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	enum addr_type_t type = UNICAST_ADDR;
+
 	return inet6_dump_addr(skb, cb, type);
 }
 
 static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	enum addr_type_t type = MULTICAST_ADDR;
+
 	return inet6_dump_addr(skb, cb, type);
 }
 
@@ -2843,31 +4189,91 @@ static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
 static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	enum addr_type_t type = ANYCAST_ADDR;
+
 	return inet6_dump_addr(skb, cb, type);
 }
 
-static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 {
+	struct net *net = sock_net(in_skb->sk);
+	struct ifaddrmsg *ifm;
+	struct nlattr *tb[IFA_MAX+1];
+	struct in6_addr *addr = NULL, *peer;
+	struct net_device *dev = NULL;
+	struct inet6_ifaddr *ifa;
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+	int err;
 
-	skb = alloc_skb(size, GFP_ATOMIC);
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+	if (err < 0)
+		goto errout;
+
+	addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
+	if (addr == NULL) {
+		err = -EINVAL;
+		goto errout;
+	}
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifa_index)
+		dev = __dev_get_by_index(net, ifm->ifa_index);
+
+	ifa = ipv6_get_ifaddr(net, addr, dev, 1);
+	if (!ifa) {
+		err = -EADDRNOTAVAIL;
+		goto errout;
+	}
+
+	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL);
 	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS);
-		return;
+		err = -ENOBUFS;
+		goto errout_ifa;
 	}
-	if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
+
+	err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
+				nlh->nlmsg_seq, RTM_NEWADDR, 0);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
+		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL);
-		return;
+		goto errout_ifa;
+	}
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+errout_ifa:
+	in6_ifa_put(ifa);
+errout:
+	return err;
+}
+
+static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
+{
+	struct sk_buff *skb;
+	struct net *net = dev_net(ifa->idev->dev);
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
 }
 
-static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
+static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 				__s32 *array, int bytes)
 {
+	BUG_ON(bytes < (DEVCONF_MAX * 4));
+
 	memset(array, 0, bytes);
 	array[DEVCONF_FORWARDING] = cnf->forwarding;
 	array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
@@ -2877,106 +4283,330 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_AUTOCONF] = cnf->autoconf;
 	array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
 	array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
-	array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
-	array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
+	array[DEVCONF_RTR_SOLICIT_INTERVAL] =
+		jiffies_to_msecs(cnf->rtr_solicit_interval);
+	array[DEVCONF_RTR_SOLICIT_DELAY] =
+		jiffies_to_msecs(cnf->rtr_solicit_delay);
 	array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
-#ifdef CONFIG_IPV6_PRIVACY
+	array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
+		jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
+	array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
+		jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
 	array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
 	array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
 	array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
 	array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
 	array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
-#endif
 	array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
+	array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+	array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+	array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
+	array[DEVCONF_RTR_PROBE_INTERVAL] =
+		jiffies_to_msecs(cnf->rtr_probe_interval);
+#ifdef CONFIG_IPV6_ROUTE_INFO
+	array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
+#endif
+#endif
+	array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
+	array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+#endif
+#ifdef CONFIG_IPV6_MROUTE
+	array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+#endif
+	array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
+	array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
+	array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
+	array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
+	array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
 }
 
-static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 
-			     u32 pid, u32 seq, int event, unsigned int flags)
+static inline size_t inet6_ifla6_size(void)
 {
-	struct net_device	*dev = idev->dev;
-	__s32			*array = NULL;
-	struct ifinfomsg	*r;
-	struct nlmsghdr 	*nlh;
-	unsigned char		*b = skb->tail;
-	struct rtattr		*subattr;
-	__u32			mtu = dev->mtu;
-	struct ifla_cacheinfo	ci;
+	return nla_total_size(4) /* IFLA_INET6_FLAGS */
+	     + nla_total_size(sizeof(struct ifla_cacheinfo))
+	     + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+	     + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+	     + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+	     + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
+}
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
-	r = NLMSG_DATA(nlh);
-	r->ifi_family = AF_INET6;
-	r->__ifi_pad = 0;
-	r->ifi_type = dev->type;
-	r->ifi_index = dev->ifindex;
-	r->ifi_flags = dev_get_flags(dev);
-	r->ifi_change = 0;
+static inline size_t inet6_if_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+	       + nla_total_size(4) /* IFLA_MTU */
+	       + nla_total_size(4) /* IFLA_LINK */
+	       + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
+}
 
-	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
+				      int items, int bytes)
+{
+	int i;
+	int pad = bytes - sizeof(u64) * items;
+	BUG_ON(pad < 0);
 
-	if (dev->addr_len)
-		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+	/* Use put_unaligned() because stats may not be aligned for u64. */
+	put_unaligned(items, &stats[0]);
+	for (i = 1; i < items; i++)
+		put_unaligned(atomic_long_read(&mib[i]), &stats[i]);
 
-	RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
-	if (dev->ifindex != dev->iflink)
-		RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
-			
-	subattr = (struct rtattr*)skb->tail;
+	memset(&stats[items], 0, pad);
+}
 
-	RTA_PUT(skb, IFLA_PROTINFO, 0, NULL);
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
+				      int items, int bytes, size_t syncpoff)
+{
+	int i;
+	int pad = bytes - sizeof(u64) * items;
+	BUG_ON(pad < 0);
 
-	/* return the device flags */
-	RTA_PUT(skb, IFLA_INET6_FLAGS, sizeof(__u32), &idev->if_flags);
+	/* Use put_unaligned() because stats may not be aligned for u64. */
+	put_unaligned(items, &stats[0]);
+	for (i = 1; i < items; i++)
+		put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
 
-	/* return interface cacheinfo */
+	memset(&stats[items], 0, pad);
+}
+
+static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
+			     int bytes)
+{
+	switch (attrtype) {
+	case IFLA_INET6_STATS:
+		__snmp6_fill_stats64(stats, idev->stats.ipv6,
+				     IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
+		break;
+	case IFLA_INET6_ICMP6STATS:
+		__snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
+		break;
+	}
+}
+
+static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
+{
+	struct nlattr *nla;
+	struct ifla_cacheinfo ci;
+
+	if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags))
+		goto nla_put_failure;
 	ci.max_reasm_len = IPV6_MAXPLEN;
-	ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	ci.reachable_time = idev->nd_parms->reachable_time;
-	ci.retrans_time = idev->nd_parms->retrans_time;
-	RTA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
-	
-	/* return the device sysctl params */
-	if ((array = kmalloc(DEVCONF_MAX * sizeof(*array), GFP_ATOMIC)) == NULL)
-		goto rtattr_failure;
-	ipv6_store_devconf(&idev->cnf, array, DEVCONF_MAX * sizeof(*array));
-	RTA_PUT(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(*array), array);
-
-	/* XXX - Statistics/MC not implemented */
-	subattr->rta_len = skb->tail - (u8*)subattr;
-
-	nlh->nlmsg_len = skb->tail - b;
-	kfree(array);
-	return skb->len;
+	ci.tstamp = cstamp_delta(idev->tstamp);
+	ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
+	ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));
+	if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))
+		goto nla_put_failure;
+	nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+	if (nla == NULL)
+		goto nla_put_failure;
+	ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
+
+	/* XXX - MC not implemented */
+
+	nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+	if (nla == NULL)
+		goto nla_put_failure;
+	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+	nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+	if (nla == NULL)
+		goto nla_put_failure;
+	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+
+	nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
+	if (nla == NULL)
+		goto nla_put_failure;
+	read_lock_bh(&idev->lock);
+	memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
+	read_unlock_bh(&idev->lock);
 
-nlmsg_failure:
-rtattr_failure:
-	if (array)
-		kfree(array);
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static size_t inet6_get_link_af_size(const struct net_device *dev)
+{
+	if (!__in6_dev_get(dev))
+		return 0;
+
+	return inet6_ifla6_size();
+}
+
+static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct inet6_dev *idev = __in6_dev_get(dev);
+
+	if (!idev)
+		return -ENODATA;
+
+	if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
+{
+	struct inet6_ifaddr *ifp;
+	struct net_device *dev = idev->dev;
+	bool update_rs = false;
+	struct in6_addr ll_addr;
+
+	ASSERT_RTNL();
+
+	if (token == NULL)
+		return -EINVAL;
+	if (ipv6_addr_any(token))
+		return -EINVAL;
+	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
+		return -EINVAL;
+	if (!ipv6_accept_ra(idev))
+		return -EINVAL;
+	if (idev->cnf.rtr_solicits <= 0)
+		return -EINVAL;
+
+	write_lock_bh(&idev->lock);
+
+	BUILD_BUG_ON(sizeof(token->s6_addr) != 16);
+	memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8);
+
+	write_unlock_bh(&idev->lock);
+
+	if (!idev->dead && (idev->if_flags & IF_READY) &&
+	    !ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
+			     IFA_F_OPTIMISTIC)) {
+
+		/* If we're not ready, then normal ifup will take care
+		 * of this. Otherwise, we need to request our rs here.
+		 */
+		ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters);
+		update_rs = true;
+	}
+
+	write_lock_bh(&idev->lock);
+
+	if (update_rs) {
+		idev->if_flags |= IF_RS_SENT;
+		idev->rs_probes = 1;
+		addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval);
+	}
+
+	/* Well, that's kinda nasty ... */
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
+		spin_lock(&ifp->lock);
+		if (ifp->tokenized) {
+			ifp->valid_lft = 0;
+			ifp->prefered_lft = 0;
+		}
+		spin_unlock(&ifp->lock);
+	}
+
+	write_unlock_bh(&idev->lock);
+	addrconf_verify_rtnl();
+	return 0;
+}
+
+static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+	int err = -EINVAL;
+	struct inet6_dev *idev = __in6_dev_get(dev);
+	struct nlattr *tb[IFLA_INET6_MAX + 1];
+
+	if (!idev)
+		return -EAFNOSUPPORT;
+
+	if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
+		BUG();
+
+	if (tb[IFLA_INET6_TOKEN])
+		err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+
+	return err;
+}
+
+static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
+			     u32 portid, u32 seq, int event, unsigned int flags)
+{
+	struct net_device *dev = idev->dev;
+	struct ifinfomsg *hdr;
+	struct nlmsghdr *nlh;
+	void *protoinfo;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	hdr = nlmsg_data(nlh);
+	hdr->ifi_family = AF_INET6;
+	hdr->__ifi_pad = 0;
+	hdr->ifi_type = dev->type;
+	hdr->ifi_index = dev->ifindex;
+	hdr->ifi_flags = dev_get_flags(dev);
+	hdr->ifi_change = 0;
+
+	if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
+	    (dev->addr_len &&
+	     nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
+	    nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
+	    (dev->ifindex != dev->iflink &&
+	     nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+		goto nla_put_failure;
+	protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
+	if (protoinfo == NULL)
+		goto nla_put_failure;
+
+	if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, protoinfo);
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
 }
 
 static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx, err;
-	int s_idx = cb->args[0];
+	struct net *net = sock_net(skb->sk);
+	int h, s_h;
+	int idx = 0, s_idx;
 	struct net_device *dev;
 	struct inet6_dev *idev;
-
-	read_lock(&dev_base_lock);
-	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
-		if (idx < s_idx)
-			continue;
-		if ((idev = in6_dev_get(dev)) == NULL)
-			continue;
-		err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, 
-				cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
-		in6_dev_put(idev);
-		if (err <= 0)
-			break;
+	struct hlist_head *head;
+
+	s_h = cb->args[0];
+	s_idx = cb->args[1];
+
+	rcu_read_lock();
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			idev = __in6_dev_get(dev);
+			if (!idev)
+				goto cont;
+			if (inet6_fill_ifinfo(skb, idev,
+					      NETLINK_CB(cb->skb).portid,
+					      cb->nlh->nlmsg_seq,
+					      RTM_NEWLINK, NLM_F_MULTI) <= 0)
+				goto out;
+cont:
+			idx++;
+		}
 	}
-	read_unlock(&dev_base_lock);
-	cb->args[0] = idx;
+out:
+	rcu_read_unlock();
+	cb->args[1] = idx;
+	cb->args[0] = h;
 
 	return skb->len;
 }
@@ -2984,34 +4614,47 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 {
 	struct sk_buff *skb;
-	/* 128 bytes ?? */
-	int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+128);
-	
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
+	struct net *net = dev_net(idev->dev);
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
+}
+
+static inline size_t inet6_prefix_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct prefixmsg))
+	       + nla_total_size(sizeof(struct in6_addr))
+	       + nla_total_size(sizeof(struct prefix_cacheinfo));
 }
 
 static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
-			struct prefix_info *pinfo, u32 pid, u32 seq, 
-			int event, unsigned int flags)
+			     struct prefix_info *pinfo, u32 portid, u32 seq,
+			     int event, unsigned int flags)
 {
-	struct prefixmsg	*pmsg;
-	struct nlmsghdr 	*nlh;
-	unsigned char		*b = skb->tail;
+	struct prefixmsg *pmsg;
+	struct nlmsghdr *nlh;
 	struct prefix_cacheinfo	ci;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags);
-	pmsg = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	pmsg = nlmsg_data(nlh);
 	pmsg->prefix_family = AF_INET6;
 	pmsg->prefix_pad1 = 0;
 	pmsg->prefix_pad2 = 0;
@@ -3019,477 +4662,670 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
 	pmsg->prefix_len = pinfo->prefix_len;
 	pmsg->prefix_type = pinfo->type;
 	pmsg->prefix_pad3 = 0;
-	
 	pmsg->prefix_flags = 0;
 	if (pinfo->onlink)
 		pmsg->prefix_flags |= IF_PREFIX_ONLINK;
 	if (pinfo->autoconf)
 		pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
 
-	RTA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
-
+	if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix))
+		goto nla_put_failure;
 	ci.preferred_time = ntohl(pinfo->prefered);
 	ci.valid_time = ntohl(pinfo->valid);
-	RTA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
-
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci))
+		goto nla_put_failure;
+	return nlmsg_end(skb, nlh);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
 }
 
-static void inet6_prefix_notify(int event, struct inet6_dev *idev, 
+static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 			 struct prefix_info *pinfo)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct prefixmsg)+128);
+	struct net *net = dev_net(idev->dev);
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC);
-}
-
-static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
-	[RTM_GETLINK - RTM_BASE] = { .dumpit	= inet6_dump_ifinfo, },
-	[RTM_NEWADDR - RTM_BASE] = { .doit	= inet6_rtm_newaddr, },
-	[RTM_DELADDR - RTM_BASE] = { .doit	= inet6_rtm_deladdr, },
-	[RTM_GETADDR - RTM_BASE] = { .dumpit	= inet6_dump_ifaddr, },
-	[RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
-	[RTM_GETANYCAST - RTM_BASE] = { .dumpit	= inet6_dump_ifacaddr, },
-	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet6_rtm_newroute, },
-	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet6_rtm_delroute, },
-	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet6_rtm_getroute,
-				      .dumpit	= inet6_dump_fib, },
-};
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
+}
 
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
+	struct net *net = dev_net(ifp->idev->dev);
+
+	if (event)
+		ASSERT_RTNL();
+
 	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
 
 	switch (event) {
 	case RTM_NEWADDR:
-		dst_hold(&ifp->rt->u.dst);
-		if (ip6_ins_rt(ifp->rt, NULL, NULL, NULL))
-			dst_release(&ifp->rt->u.dst);
+		/*
+		 * If the address was optimistic
+		 * we inserted the route at the start of
+		 * our DAD process, so we don't need
+		 * to do it again
+		 */
+		if (!(ifp->rt->rt6i_node))
+			ip6_ins_rt(ifp->rt);
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
+		if (!ipv6_addr_any(&ifp->peer_addr))
+			addrconf_prefix_route(&ifp->peer_addr, 128,
+					      ifp->idev->dev, 0, 0);
 		break;
 	case RTM_DELADDR:
 		if (ifp->idev->cnf.forwarding)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
-		dst_hold(&ifp->rt->u.dst);
-		if (ip6_del_rt(ifp->rt, NULL, NULL, NULL))
-			dst_free(&ifp->rt->u.dst);
-		else
-			dst_release(&ifp->rt->u.dst);
+		if (!ipv6_addr_any(&ifp->peer_addr)) {
+			struct rt6_info *rt;
+			struct net_device *dev = ifp->idev->dev;
+
+			rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
+					dev->ifindex, 1);
+			if (rt) {
+				dst_hold(&rt->dst);
+				if (ip6_del_rt(rt))
+					dst_free(&rt->dst);
+			}
+		}
+		dst_hold(&ifp->rt->dst);
+
+		if (ip6_del_rt(ifp->rt))
+			dst_free(&ifp->rt->dst);
 		break;
 	}
+	atomic_inc(&net->ipv6.dev_addr_genid);
+	rt_genid_bump_ipv6(net);
 }
 
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
-	read_lock_bh(&addrconf_lock);
+	rcu_read_lock_bh();
 	if (likely(ifp->idev->dead == 0))
 		__ipv6_ifa_notify(event, ifp);
-	read_unlock_bh(&addrconf_lock);
+	rcu_read_unlock_bh();
 }
 
 #ifdef CONFIG_SYSCTL
 
 static
-int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
+	loff_t pos = *ppos;
+	struct ctl_table lctl;
 	int ret;
 
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	/*
+	 * ctl->data points to idev->cnf.forwarding, we should
+	 * not modify it until we get the rtnl lock.
+	 */
+	lctl = *ctl;
+	lctl.data = &val;
 
-	if (write && valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
-			if ((!*valp) ^ (!val)) {
-				struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
-				if (idev == NULL)
-					return ret;
-				dev_forward_change(idev);
-			}
-		} else {
-			ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
-			addrconf_forward_change();
+	ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+	if (write)
+		ret = addrconf_fixup_forwarding(ctl, valp, val);
+	if (ret)
+		*ppos = pos;
+	return ret;
+}
+
+static void dev_disable_change(struct inet6_dev *idev)
+{
+	struct netdev_notifier_info info;
+
+	if (!idev || !idev->dev)
+		return;
+
+	netdev_notifier_info_init(&info, idev->dev);
+	if (idev->cnf.disable_ipv6)
+		addrconf_notify(NULL, NETDEV_DOWN, &info);
+	else
+		addrconf_notify(NULL, NETDEV_UP, &info);
+}
+
+static void addrconf_disable_change(struct net *net, __s32 newf)
+{
+	struct net_device *dev;
+	struct inet6_dev *idev;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
+		idev = __in6_dev_get(dev);
+		if (idev) {
+			int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
+			idev->cnf.disable_ipv6 = newf;
+			if (changed)
+				dev_disable_change(idev);
 		}
-		if (*valp)
-			rt6_purge_dflt_routers();
 	}
-
-        return ret;
+	rcu_read_unlock();
 }
 
-static int addrconf_sysctl_forward_strategy(ctl_table *table, 
-					    int __user *name, int nlen,
-					    void __user *oldval,
-					    size_t __user *oldlenp,
-					    void __user *newval, size_t newlen,
-					    void **context)
+static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
 {
-	int *valp = table->data;
-	int new;
+	struct net *net;
+	int old;
 
-	if (!newval || !newlen)
-		return 0;
-	if (newlen != sizeof(int))
-		return -EINVAL;
-	if (get_user(new, (int __user *)newval))
-		return -EFAULT;
-	if (new == *valp)
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	net = (struct net *)table->extra2;
+	old = *p;
+	*p = newf;
+
+	if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
+		rtnl_unlock();
 		return 0;
-	if (oldval && oldlenp) {
-		size_t len;
-		if (get_user(len, oldlenp))
-			return -EFAULT;
-		if (len) {
-			if (len > table->maxlen)
-				len = table->maxlen;
-			if (copy_to_user(oldval, valp, len))
-				return -EFAULT;
-			if (put_user(len, oldlenp))
-				return -EFAULT;
-		}
 	}
 
-	if (valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
-			struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
-			int changed;
-			if (unlikely(idev == NULL))
-				return -ENODEV;
-			changed = (!*valp) ^ (!new);
-			*valp = new;
-			if (changed)
-				dev_forward_change(idev);
-		} else {
-			*valp = new;
-			addrconf_forward_change();
-		}
+	if (p == &net->ipv6.devconf_all->disable_ipv6) {
+		net->ipv6.devconf_dflt->disable_ipv6 = newf;
+		addrconf_disable_change(net, newf);
+	} else if ((!newf) ^ (!old))
+		dev_disable_change((struct inet6_dev *)table->extra1);
 
-		if (*valp)
-			rt6_purge_dflt_routers();
-	} else
-		*valp = new;
+	rtnl_unlock();
+	return 0;
+}
 
-	return 1;
+static
+int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
+			    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	loff_t pos = *ppos;
+	struct ctl_table lctl;
+	int ret;
+
+	/*
+	 * ctl->data points to idev->cnf.disable_ipv6, we should
+	 * not modify it until we get the rtnl lock.
+	 */
+	lctl = *ctl;
+	lctl.data = &val;
+
+	ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+	if (write)
+		ret = addrconf_disable_ipv6(ctl, valp, val);
+	if (ret)
+		*ppos = pos;
+	return ret;
 }
 
+static
+int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
+			      void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int ret;
+	int old, new;
+
+	old = *valp;
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+	new = *valp;
+
+	if (write && old != new) {
+		struct net *net = ctl->extra2;
+
+		if (!rtnl_trylock())
+			return restart_syscall();
+
+		if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
+			inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+						     NETCONFA_IFINDEX_DEFAULT,
+						     net->ipv6.devconf_dflt);
+		else if (valp == &net->ipv6.devconf_all->proxy_ndp)
+			inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+						     NETCONFA_IFINDEX_ALL,
+						     net->ipv6.devconf_all);
+		else {
+			struct inet6_dev *idev = ctl->extra1;
+
+			inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+						     idev->dev->ifindex,
+						     &idev->cnf);
+		}
+		rtnl_unlock();
+	}
+
+	return ret;
+}
+
+
 static struct addrconf_sysctl_table
 {
 	struct ctl_table_header *sysctl_header;
-	ctl_table addrconf_vars[__NET_IPV6_MAX];
-	ctl_table addrconf_dev[2];
-	ctl_table addrconf_conf_dir[2];
-	ctl_table addrconf_proto_dir[2];
-	ctl_table addrconf_root_dir[2];
-} addrconf_sysctl = {
+	struct ctl_table addrconf_vars[DEVCONF_MAX+1];
+} addrconf_sysctl __read_mostly = {
 	.sysctl_header = NULL,
 	.addrconf_vars = {
-        	{
-			.ctl_name	=	NET_IPV6_FORWARDING,
-			.procname	=	"forwarding",
-         		.data		=	&ipv6_devconf.forwarding,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&addrconf_sysctl_forward,
-			.strategy	=	&addrconf_sysctl_forward_strategy,
+		{
+			.procname	= "forwarding",
+			.data		= &ipv6_devconf.forwarding,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= addrconf_sysctl_forward,
+		},
+		{
+			.procname	= "hop_limit",
+			.data		= &ipv6_devconf.hop_limit,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		{
+			.procname	= "mtu",
+			.data		= &ipv6_devconf.mtu6,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		{
+			.procname	= "accept_ra",
+			.data		= &ipv6_devconf.accept_ra,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_HOP_LIMIT,
-			.procname	=	"hop_limit",
-         		.data		=	&ipv6_devconf.hop_limit,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_redirects",
+			.data		= &ipv6_devconf.accept_redirects,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_MTU,
-			.procname	=	"mtu",
-			.data		=	&ipv6_devconf.mtu6,
-         		.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&proc_dointvec,
+			.procname	= "autoconf",
+			.data		= &ipv6_devconf.autoconf,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_ACCEPT_RA,
-			.procname	=	"accept_ra",
-         		.data		=	&ipv6_devconf.accept_ra,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&proc_dointvec,
+			.procname	= "dad_transmits",
+			.data		= &ipv6_devconf.dad_transmits,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_ACCEPT_REDIRECTS,
-			.procname	=	"accept_redirects",
-         		.data		=	&ipv6_devconf.accept_redirects,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&proc_dointvec,
+			.procname	= "router_solicitations",
+			.data		= &ipv6_devconf.rtr_solicits,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_AUTOCONF,
-			.procname	=	"autoconf",
-         		.data		=	&ipv6_devconf.autoconf,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&proc_dointvec,
+			.procname	= "router_solicitation_interval",
+			.data		= &ipv6_devconf.rtr_solicit_interval,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
 		},
 		{
-			.ctl_name	=	NET_IPV6_DAD_TRANSMITS,
-			.procname	=	"dad_transmits",
-         		.data		=	&ipv6_devconf.dad_transmits,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&proc_dointvec,
+			.procname	= "router_solicitation_delay",
+			.data		= &ipv6_devconf.rtr_solicit_delay,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
 		},
 		{
-			.ctl_name	=	NET_IPV6_RTR_SOLICITS,
-			.procname	=	"router_solicitations",
-         		.data		=	&ipv6_devconf.rtr_solicits,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&proc_dointvec,
+			.procname	= "force_mld_version",
+			.data		= &ipv6_devconf.force_mld_version,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_RTR_SOLICIT_INTERVAL,
-			.procname	=	"router_solicitation_interval",
-         		.data		=	&ipv6_devconf.rtr_solicit_interval,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&proc_dointvec_jiffies,
-			.strategy	=	&sysctl_jiffies,
+			.procname	= "mldv1_unsolicited_report_interval",
+			.data		=
+				&ipv6_devconf.mldv1_unsolicited_report_interval,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_ms_jiffies,
 		},
 		{
-			.ctl_name	=	NET_IPV6_RTR_SOLICIT_DELAY,
-			.procname	=	"router_solicitation_delay",
-         		.data		=	&ipv6_devconf.rtr_solicit_delay,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&proc_dointvec_jiffies,
-			.strategy	=	&sysctl_jiffies,
+			.procname	= "mldv2_unsolicited_report_interval",
+			.data		=
+				&ipv6_devconf.mldv2_unsolicited_report_interval,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_ms_jiffies,
 		},
 		{
-			.ctl_name	=	NET_IPV6_FORCE_MLD_VERSION,
-			.procname	=	"force_mld_version",
-         		.data		=	&ipv6_devconf.force_mld_version,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-         		.proc_handler	=	&proc_dointvec,
+			.procname	= "use_tempaddr",
+			.data		= &ipv6_devconf.use_tempaddr,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
-#ifdef CONFIG_IPV6_PRIVACY
 		{
-			.ctl_name	=	NET_IPV6_USE_TEMPADDR,
-			.procname	=	"use_tempaddr",
-	 		.data		=	&ipv6_devconf.use_tempaddr,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-	 		.proc_handler	=	&proc_dointvec,
+			.procname	= "temp_valid_lft",
+			.data		= &ipv6_devconf.temp_valid_lft,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_TEMP_VALID_LFT,
-			.procname	=	"temp_valid_lft",
-	 		.data		=	&ipv6_devconf.temp_valid_lft,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-	 		.proc_handler	=	&proc_dointvec,
+			.procname	= "temp_prefered_lft",
+			.data		= &ipv6_devconf.temp_prefered_lft,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_TEMP_PREFERED_LFT,
-			.procname	=	"temp_prefered_lft",
-	 		.data		=	&ipv6_devconf.temp_prefered_lft,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-	 		.proc_handler	=	&proc_dointvec,
+			.procname	= "regen_max_retry",
+			.data		= &ipv6_devconf.regen_max_retry,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_REGEN_MAX_RETRY,
-			.procname	=	"regen_max_retry",
-	 		.data		=	&ipv6_devconf.regen_max_retry,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-	 		.proc_handler	=	&proc_dointvec,
+			.procname	= "max_desync_factor",
+			.data		= &ipv6_devconf.max_desync_factor,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.ctl_name	=	NET_IPV6_MAX_DESYNC_FACTOR,
-			.procname	=	"max_desync_factor",
-	 		.data		=	&ipv6_devconf.max_desync_factor,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-	 		.proc_handler	=	&proc_dointvec,
+			.procname	= "max_addresses",
+			.data		= &ipv6_devconf.max_addresses,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
+		{
+			.procname	= "accept_ra_defrtr",
+			.data		= &ipv6_devconf.accept_ra_defrtr,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		{
+			.procname	= "accept_ra_pinfo",
+			.data		= &ipv6_devconf.accept_ra_pinfo,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+#ifdef CONFIG_IPV6_ROUTER_PREF
+		{
+			.procname	= "accept_ra_rtr_pref",
+			.data		= &ipv6_devconf.accept_ra_rtr_pref,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		{
+			.procname	= "router_probe_interval",
+			.data		= &ipv6_devconf.rtr_probe_interval,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
+		},
+#ifdef CONFIG_IPV6_ROUTE_INFO
+		{
+			.procname	= "accept_ra_rt_info_max_plen",
+			.data		= &ipv6_devconf.accept_ra_rt_info_max_plen,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+#endif
 #endif
 		{
-			.ctl_name	=	NET_IPV6_MAX_ADDRESSES,
-			.procname	=	"max_addresses",
-			.data		=	&ipv6_devconf.max_addresses,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	&proc_dointvec,
+			.procname	= "proxy_ndp",
+			.data		= &ipv6_devconf.proxy_ndp,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= addrconf_sysctl_proxy_ndp,
 		},
 		{
-			.ctl_name	=	0,	/* sentinel */
-		}
-	},
-	.addrconf_dev = {
+			.procname	= "accept_source_route",
+			.data		= &ipv6_devconf.accept_source_route,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 		{
-			.ctl_name	=	NET_PROTO_CONF_ALL,
-			.procname	=	"all",
-			.mode		=	0555,
-			.child		=	addrconf_sysctl.addrconf_vars,
+			.procname       = "optimistic_dad",
+			.data           = &ipv6_devconf.optimistic_dad,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec,
+
 		},
+#endif
+#ifdef CONFIG_IPV6_MROUTE
 		{
-			.ctl_name	=	0,	/* sentinel */
-		}
-	},
-	.addrconf_conf_dir = {
+			.procname	= "mc_forwarding",
+			.data		= &ipv6_devconf.mc_forwarding,
+			.maxlen		= sizeof(int),
+			.mode		= 0444,
+			.proc_handler	= proc_dointvec,
+		},
+#endif
 		{
-			.ctl_name	=	NET_IPV6_CONF,
-			.procname	=	"conf",
-			.mode		=	0555,
-			.child		=	addrconf_sysctl.addrconf_dev,
+			.procname	= "disable_ipv6",
+			.data		= &ipv6_devconf.disable_ipv6,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= addrconf_sysctl_disable,
 		},
 		{
-			.ctl_name	=	0,	/* sentinel */
-		}
-	},
-	.addrconf_proto_dir = {
+			.procname	= "accept_dad",
+			.data		= &ipv6_devconf.accept_dad,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
 		{
-			.ctl_name	=	NET_IPV6,
-			.procname	=	"ipv6",
-			.mode		=	0555,
-			.child		=	addrconf_sysctl.addrconf_conf_dir,
+			.procname       = "force_tllao",
+			.data           = &ipv6_devconf.force_tllao,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec
 		},
 		{
-			.ctl_name	=	0,	/* sentinel */
-		}
-	},
-	.addrconf_root_dir = {
+			.procname       = "ndisc_notify",
+			.data           = &ipv6_devconf.ndisc_notify,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec
+		},
 		{
-			.ctl_name	=	CTL_NET,
-			.procname	=	"net",
-			.mode		=	0555,
-			.child		=	addrconf_sysctl.addrconf_proto_dir,
+			.procname	= "suppress_frag_ndisc",
+			.data		= &ipv6_devconf.suppress_frag_ndisc,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec
 		},
 		{
-			.ctl_name	=	0,	/* sentinel */
+			/* sentinel */
 		}
 	},
 };
 
-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+static int __addrconf_sysctl_register(struct net *net, char *dev_name,
+		struct inet6_dev *idev, struct ipv6_devconf *p)
 {
 	int i;
-	struct net_device *dev = idev ? idev->dev : NULL;
 	struct addrconf_sysctl_table *t;
-	char *dev_name = NULL;
+	char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
 
-	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
 	if (t == NULL)
-		return;
-	memcpy(t, &addrconf_sysctl, sizeof(*t));
-	for (i=0; t->addrconf_vars[i].data; i++) {
-		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
-		t->addrconf_vars[i].de = NULL;
+		goto out;
+
+	for (i = 0; t->addrconf_vars[i].data; i++) {
+		t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+		t->addrconf_vars[i].extra2 = net;
 	}
-	if (dev) {
-		dev_name = dev->name; 
-		t->addrconf_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-	}
-
-	/* 
-	 * Make a copy of dev_name, because '.procname' is regarded as const 
-	 * by sysctl and we wouldn't want anyone to change it under our feet
-	 * (see SIOCSIFNAME).
-	 */	
-	dev_name = kstrdup(dev_name, GFP_KERNEL);
-	if (!dev_name)
-	    goto free;
-
-	t->addrconf_dev[0].procname = dev_name;
-
-	t->addrconf_dev[0].child = t->addrconf_vars;
-	t->addrconf_dev[0].de = NULL;
-	t->addrconf_conf_dir[0].child = t->addrconf_dev;
-	t->addrconf_conf_dir[0].de = NULL;
-	t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
-	t->addrconf_proto_dir[0].de = NULL;
-	t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
-	t->addrconf_root_dir[0].de = NULL;
-
-	t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0);
+
+	snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
+
+	t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars);
 	if (t->sysctl_header == NULL)
-		goto free_procname;
-	else
-		p->sysctl = t;
-	return;
+		goto free;
+
+	p->sysctl = t;
+	return 0;
 
-	/* error path */
- free_procname:
-	kfree(dev_name);
- free:
+free:
 	kfree(t);
+out:
+	return -ENOBUFS;
+}
 
-	return;
+static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
+{
+	struct addrconf_sysctl_table *t;
+
+	if (p->sysctl == NULL)
+		return;
+
+	t = p->sysctl;
+	p->sysctl = NULL;
+	unregister_net_sysctl_table(t->sysctl_header);
+	kfree(t);
 }
 
-static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
+static void addrconf_sysctl_register(struct inet6_dev *idev)
 {
-	if (p->sysctl) {
-		struct addrconf_sysctl_table *t = p->sysctl;
-		p->sysctl = NULL;
-		unregister_sysctl_table(t->sysctl_header);
-		kfree(t->addrconf_dev[0].procname);
-		kfree(t);
-	}
+	neigh_sysctl_register(idev->dev, idev->nd_parms,
+			      &ndisc_ifinfo_sysctl_change);
+	__addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
+					idev, &idev->cnf);
 }
 
+static void addrconf_sysctl_unregister(struct inet6_dev *idev)
+{
+	__addrconf_sysctl_unregister(&idev->cnf);
+	neigh_sysctl_unregister(idev->nd_parms);
+}
 
-#endif
 
-/*
- *      Device notifier
- */
+#endif
 
-int register_inet6addr_notifier(struct notifier_block *nb)
+static int __net_init addrconf_init_net(struct net *net)
 {
-        return notifier_chain_register(&inet6addr_chain, nb);
+	int err = -ENOMEM;
+	struct ipv6_devconf *all, *dflt;
+
+	all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
+	if (all == NULL)
+		goto err_alloc_all;
+
+	dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
+	if (dflt == NULL)
+		goto err_alloc_dflt;
+
+	/* these will be inherited by all namespaces */
+	dflt->autoconf = ipv6_defaults.autoconf;
+	dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
+
+	net->ipv6.devconf_all = all;
+	net->ipv6.devconf_dflt = dflt;
+
+#ifdef CONFIG_SYSCTL
+	err = __addrconf_sysctl_register(net, "all", NULL, all);
+	if (err < 0)
+		goto err_reg_all;
+
+	err = __addrconf_sysctl_register(net, "default", NULL, dflt);
+	if (err < 0)
+		goto err_reg_dflt;
+#endif
+	return 0;
+
+#ifdef CONFIG_SYSCTL
+err_reg_dflt:
+	__addrconf_sysctl_unregister(all);
+err_reg_all:
+	kfree(dflt);
+#endif
+err_alloc_dflt:
+	kfree(all);
+err_alloc_all:
+	return err;
 }
 
-int unregister_inet6addr_notifier(struct notifier_block *nb)
+static void __net_exit addrconf_exit_net(struct net *net)
 {
-        return notifier_chain_unregister(&inet6addr_chain,nb);
+#ifdef CONFIG_SYSCTL
+	__addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
+	__addrconf_sysctl_unregister(net->ipv6.devconf_all);
+#endif
+	if (!net_eq(net, &init_net)) {
+		kfree(net->ipv6.devconf_dflt);
+		kfree(net->ipv6.devconf_all);
+	}
 }
 
+static struct pernet_operations addrconf_ops = {
+	.init = addrconf_init_net,
+	.exit = addrconf_exit_net,
+};
+
+static struct rtnl_af_ops inet6_ops = {
+	.family		  = AF_INET6,
+	.fill_link_af	  = inet6_fill_link_af,
+	.get_link_af_size = inet6_get_link_af_size,
+	.set_link_af	  = inet6_set_link_af,
+};
+
 /*
  *	Init / cleanup code
  */
 
 int __init addrconf_init(void)
 {
-	int err = 0;
+	int i, err;
+
+	err = ipv6_addr_label_init();
+	if (err < 0) {
+		pr_crit("%s: cannot initialize default policy table: %d\n",
+			__func__, err);
+		goto out;
+	}
+
+	err = register_pernet_subsys(&addrconf_ops);
+	if (err < 0)
+		goto out_addrlabel;
+
+	addrconf_wq = create_workqueue("ipv6_addrconf");
+	if (!addrconf_wq) {
+		err = -ENOMEM;
+		goto out_nowq;
+	}
 
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
@@ -3510,91 +5346,84 @@ int __init addrconf_init(void)
 	 * device and it being up should be removed.
 	 */
 	rtnl_lock();
-	if (!ipv6_add_dev(&loopback_dev))
+	if (!ipv6_add_dev(init_net.loopback_dev))
 		err = -ENOMEM;
 	rtnl_unlock();
 	if (err)
-		return err;
+		goto errlo;
 
-	ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_addr_lst[i]);
 
 	register_netdevice_notifier(&ipv6_dev_notf);
 
-#ifdef CONFIG_IPV6_PRIVACY
-	md5_tfm = crypto_alloc_tfm("md5", 0);
-	if (unlikely(md5_tfm == NULL))
-		printk(KERN_WARNING
-			"failed to load transform for md5\n");
-#endif
+	addrconf_verify();
 
-	addrconf_verify(0);
-	rtnetlink_links[PF_INET6] = inet6_rtnetlink_table;
-#ifdef CONFIG_SYSCTL
-	addrconf_sysctl.sysctl_header =
-		register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0);
-	addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
-#endif
+	rtnl_af_register(&inet6_ops);
+
+	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
+			      NULL);
+	if (err < 0)
+		goto errout;
+
+	/* Only the first call to __rtnl_register can fail */
+	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
+			inet6_dump_ifaddr, NULL);
+	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
+			inet6_dump_ifmcaddr, NULL);
+	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
+			inet6_dump_ifacaddr, NULL);
+	__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
+			inet6_netconf_dump_devconf, NULL);
+
+	ipv6_addr_label_rtnl_register();
 
 	return 0;
+errout:
+	rtnl_af_unregister(&inet6_ops);
+	unregister_netdevice_notifier(&ipv6_dev_notf);
+errlo:
+	destroy_workqueue(addrconf_wq);
+out_nowq:
+	unregister_pernet_subsys(&addrconf_ops);
+out_addrlabel:
+	ipv6_addr_label_cleanup();
+out:
+	return err;
 }
 
-void __exit addrconf_cleanup(void)
+void addrconf_cleanup(void)
 {
- 	struct net_device *dev;
- 	struct inet6_dev *idev;
- 	struct inet6_ifaddr *ifa;
+	struct net_device *dev;
 	int i;
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
-
-	rtnetlink_links[PF_INET6] = NULL;
-#ifdef CONFIG_SYSCTL
-	addrconf_sysctl_unregister(&ipv6_devconf_dflt);
-	addrconf_sysctl_unregister(&ipv6_devconf);
-#endif
+	unregister_pernet_subsys(&addrconf_ops);
+	ipv6_addr_label_cleanup();
 
 	rtnl_lock();
 
-	/*
-	 *	clean dev list.
-	 */
+	__rtnl_af_unregister(&inet6_ops);
 
-	for (dev=dev_base; dev; dev=dev->next) {
-		if ((idev = __in6_dev_get(dev)) == NULL)
+	/* clean dev list */
+	for_each_netdev(&init_net, dev) {
+		if (__in6_dev_get(dev) == NULL)
 			continue;
 		addrconf_ifdown(dev, 1);
 	}
-	addrconf_ifdown(&loopback_dev, 2);
+	addrconf_ifdown(init_net.loopback_dev, 2);
 
 	/*
 	 *	Check hash table.
 	 */
-
-	write_lock_bh(&addrconf_hash_lock);
-	for (i=0; i < IN6_ADDR_HSIZE; i++) {
-		for (ifa=inet6_addr_lst[i]; ifa; ) {
-			struct inet6_ifaddr *bifa;
-
-			bifa = ifa;
-			ifa = ifa->lst_next;
-			printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
-			/* Do not free it; something is wrong.
-			   Now we can investigate it with debugger.
-			 */
-		}
-	}
-	write_unlock_bh(&addrconf_hash_lock);
-
-	del_timer(&addr_chk_timer);
-
+	spin_lock_bh(&addrconf_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
+	spin_unlock_bh(&addrconf_hash_lock);
+	cancel_delayed_work(&addr_chk_work);
 	rtnl_unlock();
 
-#ifdef CONFIG_IPV6_PRIVACY
-	crypto_free_tfm(md5_tfm);
-	md5_tfm = NULL;
-#endif
-
-#ifdef CONFIG_PROC_FS
-	proc_net_remove("if_inet6");
-#endif
+	destroy_workqueue(addrconf_wq);
 }
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
new file mode 100644
index 00000000000..e6960457f62
--- /dev/null
+++ b/net/ipv6/addrconf_core.c
@@ -0,0 +1,150 @@
+/*
+ * IPv6 library code, needed by static components when full IPv6 support is
+ * not configured or static.
+ */
+
+#include <linux/export.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip.h>
+
+#define IPV6_ADDR_SCOPE_TYPE(scope)	((scope) << 16)
+
+static inline unsigned int ipv6_addr_scope2type(unsigned int scope)
+{
+	switch (scope) {
+	case IPV6_ADDR_SCOPE_NODELOCAL:
+		return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) |
+			IPV6_ADDR_LOOPBACK);
+	case IPV6_ADDR_SCOPE_LINKLOCAL:
+		return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL) |
+			IPV6_ADDR_LINKLOCAL);
+	case IPV6_ADDR_SCOPE_SITELOCAL:
+		return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL) |
+			IPV6_ADDR_SITELOCAL);
+	}
+	return IPV6_ADDR_SCOPE_TYPE(scope);
+}
+
+int __ipv6_addr_type(const struct in6_addr *addr)
+{
+	__be32 st;
+
+	st = addr->s6_addr32[0];
+
+	/* Consider all addresses with the first three bits different of
+	   000 and 111 as unicasts.
+	 */
+	if ((st & htonl(0xE0000000)) != htonl(0x00000000) &&
+	    (st & htonl(0xE0000000)) != htonl(0xE0000000))
+		return (IPV6_ADDR_UNICAST |
+			IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));
+
+	if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
+		/* multicast */
+		/* addr-select 3.1 */
+		return (IPV6_ADDR_MULTICAST |
+			ipv6_addr_scope2type(IPV6_ADDR_MC_SCOPE(addr)));
+	}
+
+	if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
+		return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST |
+			IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL));		/* addr-select 3.1 */
+	if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
+		return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST |
+			IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL));		/* addr-select 3.1 */
+	if ((st & htonl(0xFE000000)) == htonl(0xFC000000))
+		return (IPV6_ADDR_UNICAST |
+			IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));			/* RFC 4193 */
+
+	if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
+		if (addr->s6_addr32[2] == 0) {
+			if (addr->s6_addr32[3] == 0)
+				return IPV6_ADDR_ANY;
+
+			if (addr->s6_addr32[3] == htonl(0x00000001))
+				return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST |
+					IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL));	/* addr-select 3.4 */
+
+			return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST |
+				IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));	/* addr-select 3.3 */
+		}
+
+		if (addr->s6_addr32[2] == htonl(0x0000ffff))
+			return (IPV6_ADDR_MAPPED |
+				IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));	/* addr-select 3.3 */
+	}
+
+	return (IPV6_ADDR_UNICAST |
+		IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));	/* addr-select 3.4 */
+}
+EXPORT_SYMBOL(__ipv6_addr_type);
+
+static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
+
+int register_inet6addr_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&inet6addr_chain, nb);
+}
+EXPORT_SYMBOL(register_inet6addr_notifier);
+
+int unregister_inet6addr_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
+}
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+
+int inet6addr_notifier_call_chain(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&inet6addr_chain, val, v);
+}
+EXPORT_SYMBOL(inet6addr_notifier_call_chain);
+
+const struct ipv6_stub *ipv6_stub __read_mostly;
+EXPORT_SYMBOL_GPL(ipv6_stub);
+
+/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+EXPORT_SYMBOL(in6addr_loopback);
+const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+EXPORT_SYMBOL(in6addr_any);
+const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+EXPORT_SYMBOL(in6addr_linklocal_allnodes);
+const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_linklocal_allrouters);
+const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
+const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
+const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
+
+static void snmp6_free_dev(struct inet6_dev *idev)
+{
+	kfree(idev->stats.icmpv6msgdev);
+	kfree(idev->stats.icmpv6dev);
+	free_percpu(idev->stats.ipv6);
+}
+
+/* Nobody refers to this device, we may destroy it. */
+
+void in6_dev_finish_destroy(struct inet6_dev *idev)
+{
+	struct net_device *dev = idev->dev;
+
+	WARN_ON(!list_empty(&idev->addr_list));
+	WARN_ON(idev->mc_list != NULL);
+	WARN_ON(timer_pending(&idev->rs_timer));
+
+#ifdef NET_REFCNT_DEBUG
+	pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
+#endif
+	dev_put(dev);
+	if (!idev->dead) {
+		pr_warn("Freeing alive inet6 device %p\n", idev);
+		return;
+	}
+	snmp6_free_dev(idev);
+	kfree_rcu(idev, rcu);
+}
+EXPORT_SYMBOL(in6_dev_finish_destroy);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
new file mode 100644
index 00000000000..731e1e1722d
--- /dev/null
+++ b/net/ipv6/addrlabel.c
@@ -0,0 +1,603 @@
+/*
+ * IPv6 Address Label subsystem
+ * for the IPv6 "Default" Source Address Selection
+ *
+ * Copyright (C)2007 USAGI/WIDE Project
+ */
+/*
+ * Author:
+ *	YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/in6.h>
+#include <linux/slab.h>
+#include <net/addrconf.h>
+#include <linux/if_addrlabel.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#if 0
+#define ADDRLABEL(x...) printk(x)
+#else
+#define ADDRLABEL(x...) do { ; } while (0)
+#endif
+
+/*
+ * Policy Table
+ */
+struct ip6addrlbl_entry {
+#ifdef CONFIG_NET_NS
+	struct net *lbl_net;
+#endif
+	struct in6_addr prefix;
+	int prefixlen;
+	int ifindex;
+	int addrtype;
+	u32 label;
+	struct hlist_node list;
+	atomic_t refcnt;
+	struct rcu_head rcu;
+};
+
+static struct ip6addrlbl_table
+{
+	struct hlist_head head;
+	spinlock_t lock;
+	u32 seq;
+} ip6addrlbl_table;
+
+static inline
+struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
+{
+	return read_pnet(&lbl->lbl_net);
+}
+
+/*
+ * Default policy table (RFC6724 + extensions)
+ *
+ * prefix		addr_type	label
+ * -------------------------------------------------------------------------
+ * ::1/128		LOOPBACK	0
+ * ::/0			N/A		1
+ * 2002::/16		N/A		2
+ * ::/96		COMPATv4	3
+ * ::ffff:0:0/96	V4MAPPED	4
+ * fc00::/7		N/A		5		ULA (RFC 4193)
+ * 2001::/32		N/A		6		Teredo (RFC 4380)
+ * 2001:10::/28		N/A		7		ORCHID (RFC 4843)
+ * fec0::/10		N/A		11		Site-local
+ *							(deprecated by RFC3879)
+ * 3ffe::/16		N/A		12		6bone
+ *
+ * Note: 0xffffffff is used if we do not have any policies.
+ * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724.
+ */
+
+#define IPV6_ADDR_LABEL_DEFAULT	0xffffffffUL
+
+static const __net_initconst struct ip6addrlbl_init_table
+{
+	const struct in6_addr *prefix;
+	int prefixlen;
+	u32 label;
+} ip6addrlbl_init_table[] = {
+	{	/* ::/0 */
+		.prefix = &in6addr_any,
+		.label = 1,
+	}, {	/* fc00::/7 */
+		.prefix = &(struct in6_addr){ { { 0xfc } } } ,
+		.prefixlen = 7,
+		.label = 5,
+	}, {	/* fec0::/10 */
+		.prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } },
+		.prefixlen = 10,
+		.label = 11,
+	}, {	/* 2002::/16 */
+		.prefix = &(struct in6_addr){ { { 0x20, 0x02 } } },
+		.prefixlen = 16,
+		.label = 2,
+	}, {	/* 3ffe::/16 */
+		.prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } },
+		.prefixlen = 16,
+		.label = 12,
+	}, {	/* 2001::/32 */
+		.prefix = &(struct in6_addr){ { { 0x20, 0x01 } } },
+		.prefixlen = 32,
+		.label = 6,
+	}, {	/* 2001:10::/28 */
+		.prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } },
+		.prefixlen = 28,
+		.label = 7,
+	}, {	/* ::ffff:0:0 */
+		.prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } },
+		.prefixlen = 96,
+		.label = 4,
+	}, {	/* ::/96 */
+		.prefix = &in6addr_any,
+		.prefixlen = 96,
+		.label = 3,
+	}, {	/* ::1/128 */
+		.prefix = &in6addr_loopback,
+		.prefixlen = 128,
+		.label = 0,
+	}
+};
+
+/* Object management */
+static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
+{
+#ifdef CONFIG_NET_NS
+	release_net(p->lbl_net);
+#endif
+	kfree(p);
+}
+
+static void ip6addrlbl_free_rcu(struct rcu_head *h)
+{
+	ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
+}
+
+static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
+{
+	return atomic_inc_not_zero(&p->refcnt);
+}
+
+static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
+{
+	if (atomic_dec_and_test(&p->refcnt))
+		call_rcu(&p->rcu, ip6addrlbl_free_rcu);
+}
+
+/* Find label */
+static bool __ip6addrlbl_match(struct net *net,
+			       const struct ip6addrlbl_entry *p,
+			       const struct in6_addr *addr,
+			       int addrtype, int ifindex)
+{
+	if (!net_eq(ip6addrlbl_net(p), net))
+		return false;
+	if (p->ifindex && p->ifindex != ifindex)
+		return false;
+	if (p->addrtype && p->addrtype != addrtype)
+		return false;
+	if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
+		return false;
+	return true;
+}
+
+static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
+						  const struct in6_addr *addr,
+						  int type, int ifindex)
+{
+	struct ip6addrlbl_entry *p;
+	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
+		if (__ip6addrlbl_match(net, p, addr, type, ifindex))
+			return p;
+	}
+	return NULL;
+}
+
+u32 ipv6_addr_label(struct net *net,
+		    const struct in6_addr *addr, int type, int ifindex)
+{
+	u32 label;
+	struct ip6addrlbl_entry *p;
+
+	type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
+
+	rcu_read_lock();
+	p = __ipv6_addr_label(net, addr, type, ifindex);
+	label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
+	rcu_read_unlock();
+
+	ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n",
+		  __func__, addr, type, ifindex, label);
+
+	return label;
+}
+
+/* allocate one entry */
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
+						 const struct in6_addr *prefix,
+						 int prefixlen, int ifindex,
+						 u32 label)
+{
+	struct ip6addrlbl_entry *newp;
+	int addrtype;
+
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n",
+		  __func__, prefix, prefixlen, ifindex, (unsigned int)label);
+
+	addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
+
+	switch (addrtype) {
+	case IPV6_ADDR_MAPPED:
+		if (prefixlen > 96)
+			return ERR_PTR(-EINVAL);
+		if (prefixlen < 96)
+			addrtype = 0;
+		break;
+	case IPV6_ADDR_COMPATv4:
+		if (prefixlen != 96)
+			addrtype = 0;
+		break;
+	case IPV6_ADDR_LOOPBACK:
+		if (prefixlen != 128)
+			addrtype = 0;
+		break;
+	}
+
+	newp = kmalloc(sizeof(*newp), GFP_KERNEL);
+	if (!newp)
+		return ERR_PTR(-ENOMEM);
+
+	ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
+	newp->prefixlen = prefixlen;
+	newp->ifindex = ifindex;
+	newp->addrtype = addrtype;
+	newp->label = label;
+	INIT_HLIST_NODE(&newp->list);
+#ifdef CONFIG_NET_NS
+	newp->lbl_net = hold_net(net);
+#endif
+	atomic_set(&newp->refcnt, 1);
+	return newp;
+}
+
+/* add a label */
+static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
+{
+	struct hlist_node *n;
+	struct ip6addrlbl_entry *last = NULL, *p = NULL;
+	int ret = 0;
+
+	ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
+		  replace);
+
+	hlist_for_each_entry_safe(p, n,	&ip6addrlbl_table.head, list) {
+		if (p->prefixlen == newp->prefixlen &&
+		    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
+		    p->ifindex == newp->ifindex &&
+		    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
+			if (!replace) {
+				ret = -EEXIST;
+				goto out;
+			}
+			hlist_replace_rcu(&p->list, &newp->list);
+			ip6addrlbl_put(p);
+			goto out;
+		} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
+			   (p->prefixlen < newp->prefixlen)) {
+			hlist_add_before_rcu(&newp->list, &p->list);
+			goto out;
+		}
+		last = p;
+	}
+	if (last)
+		hlist_add_after_rcu(&last->list, &newp->list);
+	else
+		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
+out:
+	if (!ret)
+		ip6addrlbl_table.seq++;
+	return ret;
+}
+
+/* add a label */
+static int ip6addrlbl_add(struct net *net,
+			  const struct in6_addr *prefix, int prefixlen,
+			  int ifindex, u32 label, int replace)
+{
+	struct ip6addrlbl_entry *newp;
+	int ret = 0;
+
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
+		  __func__, prefix, prefixlen, ifindex, (unsigned int)label,
+		  replace);
+
+	newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
+	if (IS_ERR(newp))
+		return PTR_ERR(newp);
+	spin_lock(&ip6addrlbl_table.lock);
+	ret = __ip6addrlbl_add(newp, replace);
+	spin_unlock(&ip6addrlbl_table.lock);
+	if (ret)
+		ip6addrlbl_free(newp);
+	return ret;
+}
+
+/* remove a label */
+static int __ip6addrlbl_del(struct net *net,
+			    const struct in6_addr *prefix, int prefixlen,
+			    int ifindex)
+{
+	struct ip6addrlbl_entry *p = NULL;
+	struct hlist_node *n;
+	int ret = -ESRCH;
+
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
+		  __func__, prefix, prefixlen, ifindex);
+
+	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+		if (p->prefixlen == prefixlen &&
+		    net_eq(ip6addrlbl_net(p), net) &&
+		    p->ifindex == ifindex &&
+		    ipv6_addr_equal(&p->prefix, prefix)) {
+			hlist_del_rcu(&p->list);
+			ip6addrlbl_put(p);
+			ret = 0;
+			break;
+		}
+	}
+	return ret;
+}
+
+static int ip6addrlbl_del(struct net *net,
+			  const struct in6_addr *prefix, int prefixlen,
+			  int ifindex)
+{
+	struct in6_addr prefix_buf;
+	int ret;
+
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
+		  __func__, prefix, prefixlen, ifindex);
+
+	ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
+	spin_lock(&ip6addrlbl_table.lock);
+	ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
+	spin_unlock(&ip6addrlbl_table.lock);
+	return ret;
+}
+
+/* add default label */
+static int __net_init ip6addrlbl_net_init(struct net *net)
+{
+	int err = 0;
+	int i;
+
+	ADDRLABEL(KERN_DEBUG "%s\n", __func__);
+
+	for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
+		int ret = ip6addrlbl_add(net,
+					 ip6addrlbl_init_table[i].prefix,
+					 ip6addrlbl_init_table[i].prefixlen,
+					 0,
+					 ip6addrlbl_init_table[i].label, 0);
+		/* XXX: should we free all rules when we catch an error? */
+		if (ret && (!err || err != -ENOMEM))
+			err = ret;
+	}
+	return err;
+}
+
+static void __net_exit ip6addrlbl_net_exit(struct net *net)
+{
+	struct ip6addrlbl_entry *p = NULL;
+	struct hlist_node *n;
+
+	/* Remove all labels belonging to the exiting net */
+	spin_lock(&ip6addrlbl_table.lock);
+	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+		if (net_eq(ip6addrlbl_net(p), net)) {
+			hlist_del_rcu(&p->list);
+			ip6addrlbl_put(p);
+		}
+	}
+	spin_unlock(&ip6addrlbl_table.lock);
+}
+
+static struct pernet_operations ipv6_addr_label_ops = {
+	.init = ip6addrlbl_net_init,
+	.exit = ip6addrlbl_net_exit,
+};
+
+int __init ipv6_addr_label_init(void)
+{
+	spin_lock_init(&ip6addrlbl_table.lock);
+
+	return register_pernet_subsys(&ipv6_addr_label_ops);
+}
+
+void ipv6_addr_label_cleanup(void)
+{
+	unregister_pernet_subsys(&ipv6_addr_label_ops);
+}
+
+static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
+	[IFAL_ADDRESS]		= { .len = sizeof(struct in6_addr), },
+	[IFAL_LABEL]		= { .len = sizeof(u32), },
+};
+
+static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifaddrlblmsg *ifal;
+	struct nlattr *tb[IFAL_MAX+1];
+	struct in6_addr *pfx;
+	u32 label;
+	int err = 0;
+
+	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+	if (err < 0)
+		return err;
+
+	ifal = nlmsg_data(nlh);
+
+	if (ifal->ifal_family != AF_INET6 ||
+	    ifal->ifal_prefixlen > 128)
+		return -EINVAL;
+
+	if (!tb[IFAL_ADDRESS])
+		return -EINVAL;
+	pfx = nla_data(tb[IFAL_ADDRESS]);
+
+	if (!tb[IFAL_LABEL])
+		return -EINVAL;
+	label = nla_get_u32(tb[IFAL_LABEL]);
+	if (label == IPV6_ADDR_LABEL_DEFAULT)
+		return -EINVAL;
+
+	switch (nlh->nlmsg_type) {
+	case RTM_NEWADDRLABEL:
+		if (ifal->ifal_index &&
+		    !__dev_get_by_index(net, ifal->ifal_index))
+			return -EINVAL;
+
+		err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
+				     ifal->ifal_index, label,
+				     nlh->nlmsg_flags & NLM_F_REPLACE);
+		break;
+	case RTM_DELADDRLABEL:
+		err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen,
+				     ifal->ifal_index);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+	return err;
+}
+
+static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
+			      int prefixlen, int ifindex, u32 lseq)
+{
+	struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
+	ifal->ifal_family = AF_INET6;
+	ifal->ifal_prefixlen = prefixlen;
+	ifal->ifal_flags = 0;
+	ifal->ifal_index = ifindex;
+	ifal->ifal_seq = lseq;
+};
+
+static int ip6addrlbl_fill(struct sk_buff *skb,
+			   struct ip6addrlbl_entry *p,
+			   u32 lseq,
+			   u32 portid, u32 seq, int event,
+			   unsigned int flags)
+{
+	struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event,
+					 sizeof(struct ifaddrlblmsg), flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
+
+	if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 ||
+	    nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
+		nlmsg_cancel(skb, nlh);
+		return -EMSGSIZE;
+	}
+
+	return nlmsg_end(skb, nlh);
+}
+
+static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ip6addrlbl_entry *p;
+	int idx = 0, s_idx = cb->args[0];
+	int err;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
+		if (idx >= s_idx &&
+		    net_eq(ip6addrlbl_net(p), net)) {
+			err = ip6addrlbl_fill(skb, p,
+					      ip6addrlbl_table.seq,
+					      NETLINK_CB(cb->skb).portid,
+					      cb->nlh->nlmsg_seq,
+					      RTM_NEWADDRLABEL,
+					      NLM_F_MULTI);
+			if (err <= 0)
+				break;
+		}
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static inline int ip6addrlbl_msgsize(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
+		+ nla_total_size(16)	/* IFAL_ADDRESS */
+		+ nla_total_size(4);	/* IFAL_LABEL */
+}
+
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct ifaddrlblmsg *ifal;
+	struct nlattr *tb[IFAL_MAX+1];
+	struct in6_addr *addr;
+	u32 lseq;
+	int err = 0;
+	struct ip6addrlbl_entry *p;
+	struct sk_buff *skb;
+
+	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+	if (err < 0)
+		return err;
+
+	ifal = nlmsg_data(nlh);
+
+	if (ifal->ifal_family != AF_INET6 ||
+	    ifal->ifal_prefixlen != 128)
+		return -EINVAL;
+
+	if (ifal->ifal_index &&
+	    !__dev_get_by_index(net, ifal->ifal_index))
+		return -EINVAL;
+
+	if (!tb[IFAL_ADDRESS])
+		return -EINVAL;
+	addr = nla_data(tb[IFAL_ADDRESS]);
+
+	rcu_read_lock();
+	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
+	if (p && ip6addrlbl_hold(p))
+		p = NULL;
+	lseq = ip6addrlbl_table.seq;
+	rcu_read_unlock();
+
+	if (!p) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
+	if (!skb) {
+		ip6addrlbl_put(p);
+		return -ENOBUFS;
+	}
+
+	err = ip6addrlbl_fill(skb, p, lseq,
+			      NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
+			      RTM_NEWADDRLABEL, 0);
+
+	ip6addrlbl_put(p);
+
+	if (err < 0) {
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto out;
+	}
+
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+out:
+	return err;
+}
+
+void __init ipv6_addr_label_rtnl_register(void)
+{
+	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
+			NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
+			NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
+			ip6addrlbl_dump, NULL);
+}
+
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 4f8795af2ed..7cb4392690d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1,14 +1,12 @@
 /*
  *	PF_INET6 socket protocol family
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	Adapted from linux/net/ipv4/af_inet.c
  *
- *	$Id: af_inet6.c,v 1.66 2002/02/01 22:01:04 davem Exp $
- *
  * 	Fixes:
  *	piggy, Karl Knutson	:	Socket protocol table
  * 	Hideaki YOSHIFUJI	:	sin6_scope_id support
@@ -20,15 +18,15 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "IPv6: " fmt
 
 #include <linux/module.h>
-#include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
@@ -39,42 +37,59 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/icmpv6.h>
-#include <linux/smp_lock.h>
 #include <linux/netfilter_ipv6.h>
 
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/udp.h>
+#include <net/udplite.h>
 #include <net/tcp.h>
-#include <net/ipip.h>
+#include <net/ping.h>
 #include <net/protocol.h>
 #include <net/inet_common.h>
+#include <net/route.h>
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#include <net/ndisc.h>
 #ifdef CONFIG_IPV6_TUNNEL
 #include <net/ip6_tunnel.h>
 #endif
 
 #include <asm/uaccess.h>
-#include <asm/system.h>
+#include <linux/mroute6.h>
 
 MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
 MODULE_LICENSE("GPL");
 
-int sysctl_ipv6_bindv6only;
-
-/* The inetsw table contains everything that inet_create needs to
+/* The inetsw6 table contains everything that inet6_create needs to
  * build a new socket.
  */
 static struct list_head inetsw6[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw6_lock);
 
+struct ipv6_params ipv6_defaults = {
+	.disable_ipv6 = 0,
+	.autoconf = 1,
+};
+
+static int disable_ipv6_mod;
+
+module_param_named(disable, disable_ipv6_mod, int, 0444);
+MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
+
+module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444);
+MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
+
+module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
+MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
+
 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 {
 	const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
@@ -82,24 +97,25 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
 }
 
-static int inet6_create(struct socket *sock, int protocol)
+static int inet6_create(struct net *net, struct socket *sock, int protocol,
+			int kern)
 {
 	struct inet_sock *inet;
 	struct ipv6_pinfo *np;
 	struct sock *sk;
-	struct list_head *p;
 	struct inet_protosw *answer;
 	struct proto *answer_prot;
 	unsigned char answer_flags;
-	char answer_no_check;
-	int rc;
+	int try_loading_module = 0;
+	int err;
 
 	/* Look for the requested type/protocol pair. */
-	answer = NULL;
+lookup_protocol:
+	err = -ESOCKTNOSUPPORT;
 	rcu_read_lock();
-	list_for_each_rcu(p, &inetsw6[sock->type]) {
-		answer = list_entry(p, struct inet_protosw, list);
+	list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
 
+		err = 0;
 		/* Check the non-wild match. */
 		if (protocol == answer->protocol) {
 			if (protocol != IPPROTO_IP)
@@ -113,44 +129,59 @@ static int inet6_create(struct socket *sock, int protocol)
 			if (IPPROTO_IP == answer->protocol)
 				break;
 		}
-		answer = NULL;
+		err = -EPROTONOSUPPORT;
 	}
 
-	rc = -ESOCKTNOSUPPORT;
-	if (!answer)
-		goto out_rcu_unlock;
-	rc = -EPERM;
-	if (answer->capability > 0 && !capable(answer->capability))
-		goto out_rcu_unlock;
-	rc = -EPROTONOSUPPORT;
-	if (!protocol)
+	if (err) {
+		if (try_loading_module < 2) {
+			rcu_read_unlock();
+			/*
+			 * Be more specific, e.g. net-pf-10-proto-132-type-1
+			 * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM)
+			 */
+			if (++try_loading_module == 1)
+				request_module("net-pf-%d-proto-%d-type-%d",
+						PF_INET6, protocol, sock->type);
+			/*
+			 * Fall back to generic, e.g. net-pf-10-proto-132
+			 * (net-pf-PF_INET6-proto-IPPROTO_SCTP)
+			 */
+			else
+				request_module("net-pf-%d-proto-%d",
+						PF_INET6, protocol);
+			goto lookup_protocol;
+		} else
+			goto out_rcu_unlock;
+	}
+
+	err = -EPERM;
+	if (sock->type == SOCK_RAW && !kern &&
+	    !ns_capable(net->user_ns, CAP_NET_RAW))
 		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
-
 	answer_prot = answer->prot;
-	answer_no_check = answer->no_check;
 	answer_flags = answer->flags;
 	rcu_read_unlock();
 
-	BUG_TRAP(answer_prot->slab != NULL);
+	WARN_ON(answer_prot->slab == NULL);
 
-	rc = -ENOBUFS;
-	sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1);
+	err = -ENOBUFS;
+	sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot);
 	if (sk == NULL)
 		goto out;
 
 	sock_init_data(sock, sk);
 
-	rc = 0;
-	sk->sk_no_check = answer_no_check;
+	err = 0;
 	if (INET_PROTOSW_REUSE & answer_flags)
-		sk->sk_reuse = 1;
+		sk->sk_reuse = SK_CAN_REUSE;
 
 	inet = inet_sk(sk);
+	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
 	if (SOCK_RAW == sock->type) {
-		inet->num = protocol;
+		inet->inet_num = protocol;
 		if (IPPROTO_RAW == protocol)
 			inet->hdrincl = 1;
 	}
@@ -163,11 +194,11 @@ static int inet6_create(struct socket *sock, int protocol)
 
 	inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
 	np->hop_limit	= -1;
-	np->mcast_hops	= -1;
+	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->ipv6only	= sysctl_ipv6_bindv6only;
-	
+	np->ipv6only	= net->ipv6.sysctl.bindv6only;
+
 	/* Init the ipv4 part of the socket since we can have sockets
 	 * using v6 API for ipv4.
 	 */
@@ -177,12 +208,13 @@ static int inet6_create(struct socket *sock, int protocol)
 	inet->mc_ttl	= 1;
 	inet->mc_index	= 0;
 	inet->mc_list	= NULL;
+	inet->rcv_tos	= 0;
 
-	if (ipv4_config.no_pmtu_disc)
+	if (net->ipv4.sysctl_ip_no_pmtu_disc)
 		inet->pmtudisc = IP_PMTUDISC_DONT;
 	else
 		inet->pmtudisc = IP_PMTUDISC_WANT;
-	/* 
+	/*
 	 * Increment only the relevant sk_prot->socks debug field, this changes
 	 * the previous behaviour of incrementing both the equivalent to
 	 * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
@@ -193,23 +225,23 @@ static int inet6_create(struct socket *sock, int protocol)
 	 */
 	sk_refcnt_debug_inc(sk);
 
-	if (inet->num) {
+	if (inet->inet_num) {
 		/* It assumes that any protocol which allows
 		 * the user to assign a number at socket
 		 * creation time automatically shares.
 		 */
-		inet->sport = ntohs(inet->num);
+		inet->inet_sport = htons(inet->inet_num);
 		sk->sk_prot->hash(sk);
 	}
 	if (sk->sk_prot->init) {
-		rc = sk->sk_prot->init(sk);
-		if (rc) {
+		err = sk->sk_prot->init(sk);
+		if (err) {
 			sk_common_release(sk);
 			goto out;
 		}
 	}
 out:
-	return rc;
+	return err;
 out_rcu_unlock:
 	rcu_read_unlock();
 	goto out;
@@ -219,11 +251,12 @@ out_rcu_unlock:
 /* bind for INET6 API */
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
-	struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr;
+	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
 	struct sock *sk = sock->sk;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	__u32 v4addr = 0;
+	struct net *net = sock_net(sk);
+	__be32 v4addr = 0;
 	unsigned short snum;
 	int addr_type = 0;
 	int err = 0;
@@ -234,26 +267,47 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 	if (addr_len < SIN6_LEN_RFC2133)
 		return -EINVAL;
+
+	if (addr->sin6_family != AF_INET6)
+		return -EAFNOSUPPORT;
+
 	addr_type = ipv6_addr_type(&addr->sin6_addr);
 	if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
 		return -EINVAL;
 
 	snum = ntohs(addr->sin6_port);
-	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+	if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
 		return -EACCES;
 
 	lock_sock(sk);
 
 	/* Check these errors (active socket, double bind). */
-	if (sk->sk_state != TCP_CLOSE || inet->num) {
+	if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
 		err = -EINVAL;
 		goto out;
 	}
 
 	/* Check if the address belongs to the host. */
 	if (addr_type == IPV6_ADDR_MAPPED) {
+		int chk_addr_ret;
+
+		/* Binding to v4-mapped address on a v6-only socket
+		 * makes no sense
+		 */
+		if (np->ipv6only) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* Reproduce AF_INET checks to make the bindings consistent */
 		v4addr = addr->sin6_addr.s6_addr32[3];
-		if (inet_addr_type(v4addr) != RTN_LOCAL) {
+		chk_addr_ret = inet_addr_type(net, v4addr);
+		if (!sysctl_ip_nonlocal_bind &&
+		    !(inet->freebind || inet->transparent) &&
+		    v4addr != htonl(INADDR_ANY) &&
+		    chk_addr_ret != RTN_LOCAL &&
+		    chk_addr_ret != RTN_MULTICAST &&
+		    chk_addr_ret != RTN_BROADCAST) {
 			err = -EADDRNOTAVAIL;
 			goto out;
 		}
@@ -261,7 +315,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		if (addr_type != IPV6_ADDR_ANY) {
 			struct net_device *dev = NULL;
 
-			if (addr_type & IPV6_ADDR_LINKLOCAL) {
+			rcu_read_lock();
+			if (__ipv6_addr_needs_scope_id(addr_type)) {
 				if (addr_len >= sizeof(struct sockaddr_in6) &&
 				    addr->sin6_scope_id) {
 					/* Override any existing binding, if another one
@@ -269,16 +324,16 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 					 */
 					sk->sk_bound_dev_if = addr->sin6_scope_id;
 				}
-				
+
 				/* Binding to link-local address requires an interface */
 				if (!sk->sk_bound_dev_if) {
 					err = -EINVAL;
-					goto out;
+					goto out_unlock;
 				}
-				dev = dev_get_by_index(sk->sk_bound_dev_if);
+				dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
 				if (!dev) {
 					err = -ENODEV;
-					goto out;
+					goto out_unlock;
 				}
 			}
 
@@ -287,25 +342,24 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			 */
 			v4addr = LOOPBACK4_IPV6;
 			if (!(addr_type & IPV6_ADDR_MULTICAST))	{
-				if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
-					if (dev)
-						dev_put(dev);
+				if (!(inet->freebind || inet->transparent) &&
+				    !ipv6_chk_addr(net, &addr->sin6_addr,
+						   dev, 0)) {
 					err = -EADDRNOTAVAIL;
-					goto out;
+					goto out_unlock;
 				}
 			}
-			if (dev)
-				dev_put(dev);
+			rcu_read_unlock();
 		}
 	}
 
-	inet->rcv_saddr = v4addr;
-	inet->saddr = v4addr;
+	inet->inet_rcv_saddr = v4addr;
+	inet->inet_saddr = v4addr;
+
+	sk->sk_v6_rcv_saddr = addr->sin6_addr;
 
-	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
-		
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
-		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+		np->saddr = addr->sin6_addr;
 
 	/* Make sure we are allowed to bind here. */
 	if (sk->sk_prot->get_port(sk, snum)) {
@@ -314,17 +368,24 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 	}
 
-	if (addr_type != IPV6_ADDR_ANY)
+	if (addr_type != IPV6_ADDR_ANY) {
 		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+		if (addr_type != IPV6_ADDR_MAPPED)
+			np->ipv6only = 1;
+	}
 	if (snum)
 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
-	inet->sport = ntohs(inet->num);
-	inet->dport = 0;
-	inet->daddr = 0;
+	inet->inet_sport = htons(inet->inet_num);
+	inet->inet_dport = 0;
+	inet->inet_daddr = 0;
 out:
 	release_sock(sk);
 	return err;
+out_unlock:
+	rcu_read_unlock();
+	goto out;
 }
+EXPORT_SYMBOL(inet6_bind);
 
 int inet6_release(struct socket *sock)
 {
@@ -341,22 +402,22 @@ int inet6_release(struct socket *sock)
 
 	return inet_release(sock);
 }
+EXPORT_SYMBOL(inet6_release);
 
-int inet6_destroy_sock(struct sock *sk)
+void inet6_destroy_sock(struct sock *sk)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
 	struct ipv6_txoptions *opt;
 
-	/*
-	 *	Release destination entry
-	 */
-
-	sk_dst_reset(sk);
-
 	/* Release rx options */
 
-	if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
+	skb = xchg(&np->pktoptions, NULL);
+	if (skb != NULL)
+		kfree_skb(skb);
+
+	skb = xchg(&np->rxpmtu, NULL);
+	if (skb != NULL)
 		kfree_skb(skb);
 
 	/* Free flowlabels */
@@ -364,177 +425,159 @@ int inet6_destroy_sock(struct sock *sk)
 
 	/* Free tx options */
 
-	if ((opt = xchg(&np->opt, NULL)) != NULL)
+	opt = xchg(&np->opt, NULL);
+	if (opt != NULL)
 		sock_kfree_s(sk, opt, opt->tot_len);
-
-	return 0;
 }
+EXPORT_SYMBOL_GPL(inet6_destroy_sock);
 
 /*
  *	This does both peername and sockname.
  */
- 
+
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		 int *uaddr_len, int peer)
 {
-	struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr;
+	struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
 	struct sock *sk = sock->sk;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
-  
+
 	sin->sin6_family = AF_INET6;
 	sin->sin6_flowinfo = 0;
 	sin->sin6_scope_id = 0;
 	if (peer) {
-		if (!inet->dport)
+		if (!inet->inet_dport)
 			return -ENOTCONN;
 		if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
 		    peer == 1)
 			return -ENOTCONN;
-		sin->sin6_port = inet->dport;
-		ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
+		sin->sin6_port = inet->inet_dport;
+		sin->sin6_addr = sk->sk_v6_daddr;
 		if (np->sndflow)
 			sin->sin6_flowinfo = np->flow_label;
 	} else {
-		if (ipv6_addr_any(&np->rcv_saddr))
-			ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
+		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+			sin->sin6_addr = np->saddr;
 		else
-			ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
+			sin->sin6_addr = sk->sk_v6_rcv_saddr;
 
-		sin->sin6_port = inet->sport;
+		sin->sin6_port = inet->inet_sport;
 	}
-	if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		sin->sin6_scope_id = sk->sk_bound_dev_if;
+	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
+						 sk->sk_bound_dev_if);
 	*uaddr_len = sizeof(*sin);
-	return(0);
+	return 0;
 }
+EXPORT_SYMBOL(inet6_getname);
 
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
-	int err = -EINVAL;
+	struct net *net = sock_net(sk);
 
-	switch(cmd) 
-	{
+	switch (cmd) {
 	case SIOCGSTAMP:
 		return sock_get_timestamp(sk, (struct timeval __user *)arg);
 
+	case SIOCGSTAMPNS:
+		return sock_get_timestampns(sk, (struct timespec __user *)arg);
+
 	case SIOCADDRT:
 	case SIOCDELRT:
-	  
-		return(ipv6_route_ioctl(cmd,(void __user *)arg));
+
+		return ipv6_route_ioctl(net, cmd, (void __user *)arg);
 
 	case SIOCSIFADDR:
-		return addrconf_add_ifaddr((void __user *) arg);
+		return addrconf_add_ifaddr(net, (void __user *) arg);
 	case SIOCDIFADDR:
-		return addrconf_del_ifaddr((void __user *) arg);
+		return addrconf_del_ifaddr(net, (void __user *) arg);
 	case SIOCSIFDSTADDR:
-		return addrconf_set_dstaddr((void __user *) arg);
+		return addrconf_set_dstaddr(net, (void __user *) arg);
 	default:
-		if (!sk->sk_prot->ioctl ||
-		    (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD)
-			return(dev_ioctl(cmd,(void __user *) arg));		
-		return err;
+		if (!sk->sk_prot->ioctl)
+			return -ENOIOCTLCMD;
+		return sk->sk_prot->ioctl(sk, cmd, arg);
 	}
 	/*NOTREACHED*/
-	return(0);
+	return 0;
 }
-
-struct proto_ops inet6_stream_ops = {
-	.family =	PF_INET6,
-	.owner =	THIS_MODULE,
-	.release =	inet6_release,
-	.bind =		inet6_bind,
-	.connect =	inet_stream_connect,		/* ok		*/
-	.socketpair =	sock_no_socketpair,		/* a do nothing	*/
-	.accept =	inet_accept,			/* ok		*/
-	.getname =	inet6_getname, 
-	.poll =		tcp_poll,			/* ok		*/
-	.ioctl =	inet6_ioctl,			/* must change  */
-	.listen =	inet_listen,			/* ok		*/
-	.shutdown =	inet_shutdown,			/* ok		*/
-	.setsockopt =	sock_common_setsockopt,		/* ok		*/
-	.getsockopt =	sock_common_getsockopt,		/* ok		*/
-	.sendmsg =	inet_sendmsg,			/* ok		*/
-	.recvmsg =	sock_common_recvmsg,		/* ok		*/
-	.mmap =		sock_no_mmap,
-	.sendpage =	tcp_sendpage
+EXPORT_SYMBOL(inet6_ioctl);
+
+const struct proto_ops inet6_stream_ops = {
+	.family		   = PF_INET6,
+	.owner		   = THIS_MODULE,
+	.release	   = inet6_release,
+	.bind		   = inet6_bind,
+	.connect	   = inet_stream_connect,	/* ok		*/
+	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
+	.accept		   = inet_accept,		/* ok		*/
+	.getname	   = inet6_getname,
+	.poll		   = tcp_poll,			/* ok		*/
+	.ioctl		   = inet6_ioctl,		/* must change  */
+	.listen		   = inet_listen,		/* ok		*/
+	.shutdown	   = inet_shutdown,		/* ok		*/
+	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
+	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
+	.sendmsg	   = inet_sendmsg,		/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = inet_sendpage,
+	.splice_read	   = tcp_splice_read,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
 };
 
-struct proto_ops inet6_dgram_ops = {
-	.family =	PF_INET6,
-	.owner =	THIS_MODULE,
-	.release =	inet6_release,
-	.bind =		inet6_bind,
-	.connect =	inet_dgram_connect,		/* ok		*/
-	.socketpair =	sock_no_socketpair,		/* a do nothing	*/
-	.accept =	sock_no_accept,			/* a do nothing	*/
-	.getname =	inet6_getname, 
-	.poll =		udp_poll,			/* ok		*/
-	.ioctl =	inet6_ioctl,			/* must change  */
-	.listen =	sock_no_listen,			/* ok		*/
-	.shutdown =	inet_shutdown,			/* ok		*/
-	.setsockopt =	sock_common_setsockopt,		/* ok		*/
-	.getsockopt =	sock_common_getsockopt,		/* ok		*/
-	.sendmsg =	inet_sendmsg,			/* ok		*/
-	.recvmsg =	sock_common_recvmsg,		/* ok		*/
-	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
+const struct proto_ops inet6_dgram_ops = {
+	.family		   = PF_INET6,
+	.owner		   = THIS_MODULE,
+	.release	   = inet6_release,
+	.bind		   = inet6_bind,
+	.connect	   = inet_dgram_connect,	/* ok		*/
+	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
+	.accept		   = sock_no_accept,		/* a do nothing	*/
+	.getname	   = inet6_getname,
+	.poll		   = udp_poll,			/* ok		*/
+	.ioctl		   = inet6_ioctl,		/* must change  */
+	.listen		   = sock_no_listen,		/* ok		*/
+	.shutdown	   = inet_shutdown,		/* ok		*/
+	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
+	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
+	.sendmsg	   = inet_sendmsg,		/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
 };
 
-static struct net_proto_family inet6_family_ops = {
+static const struct net_proto_family inet6_family_ops = {
 	.family = PF_INET6,
 	.create = inet6_create,
 	.owner	= THIS_MODULE,
 };
 
-/* Same as inet6_dgram_ops, sans udp_poll.  */
-static struct proto_ops inet6_sockraw_ops = {
-	.family =	PF_INET6,
-	.owner =	THIS_MODULE,
-	.release =	inet6_release,
-	.bind =		inet6_bind,
-	.connect =	inet_dgram_connect,		/* ok		*/
-	.socketpair =	sock_no_socketpair,		/* a do nothing	*/
-	.accept =	sock_no_accept,			/* a do nothing	*/
-	.getname =	inet6_getname, 
-	.poll =		datagram_poll,			/* ok		*/
-	.ioctl =	inet6_ioctl,			/* must change  */
-	.listen =	sock_no_listen,			/* ok		*/
-	.shutdown =	inet_shutdown,			/* ok		*/
-	.setsockopt =	sock_common_setsockopt,		/* ok		*/
-	.getsockopt =	sock_common_getsockopt,		/* ok		*/
-	.sendmsg =	inet_sendmsg,			/* ok		*/
-	.recvmsg =	sock_common_recvmsg,		/* ok		*/
-	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
-};
-
-static struct inet_protosw rawv6_protosw = {
-	.type		= SOCK_RAW,
-	.protocol	= IPPROTO_IP,	/* wild card */
-	.prot		= &rawv6_prot,
-	.ops		= &inet6_sockraw_ops,
-	.capability	= CAP_NET_RAW,
-	.no_check	= UDP_CSUM_DEFAULT,
-	.flags		= INET_PROTOSW_REUSE,
-};
-
-void
-inet6_register_protosw(struct inet_protosw *p)
+int inet6_register_protosw(struct inet_protosw *p)
 {
 	struct list_head *lh;
 	struct inet_protosw *answer;
-	int protocol = p->protocol;
 	struct list_head *last_perm;
+	int protocol = p->protocol;
+	int ret;
 
 	spin_lock_bh(&inetsw6_lock);
 
+	ret = -EINVAL;
 	if (p->type >= SOCK_MAX)
 		goto out_illegal;
 
 	/* If we are trying to override a permanent protocol, bail. */
 	answer = NULL;
+	ret = -EPERM;
 	last_perm = &inetsw6[p->type];
 	list_for_each(lh, &inetsw6[p->type]) {
 		answer = list_entry(lh, struct inet_protosw, list);
@@ -554,32 +597,31 @@ inet6_register_protosw(struct inet_protosw *p)
 	/* Add the new entry after the last permanent entry if any, so that
 	 * the new entry does not override a permanent entry when matched with
 	 * a wild-card protocol. But it is allowed to override any existing
-	 * non-permanent entry.  This means that when we remove this entry, the 
+	 * non-permanent entry.  This means that when we remove this entry, the
 	 * system automatically returns to the old behavior.
 	 */
 	list_add_rcu(&p->list, last_perm);
+	ret = 0;
 out:
 	spin_unlock_bh(&inetsw6_lock);
-	return;
+	return ret;
 
 out_permanent:
-	printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
-	       protocol);
+	pr_err("Attempt to override permanent protocol %d\n", protocol);
 	goto out;
 
 out_illegal:
-	printk(KERN_ERR
-	       "Ignoring attempt to register invalid socket type %d.\n",
+	pr_err("Ignoring attempt to register invalid socket type %d\n",
 	       p->type);
 	goto out;
 }
+EXPORT_SYMBOL(inet6_register_protosw);
 
 void
 inet6_unregister_protosw(struct inet_protosw *p)
 {
 	if (INET_PROTOSW_PERMANENT & p->flags) {
-		printk(KERN_ERR
-		       "Attempt to unregister permanent protocol %d.\n",
+		pr_err("Attempt to unregister permanent protocol %d\n",
 		       p->protocol);
 	} else {
 		spin_lock_bh(&inetsw6_lock);
@@ -589,89 +631,207 @@ inet6_unregister_protosw(struct inet_protosw *p)
 		synchronize_net();
 	}
 }
+EXPORT_SYMBOL(inet6_unregister_protosw);
 
-int
-snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+int inet6_sk_rebuild_header(struct sock *sk)
 {
-	if (ptr == NULL)
-		return -EINVAL;
-
-	ptr[0] = __alloc_percpu(mibsize, mibalign);
-	if (!ptr[0])
-		goto err0;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct dst_entry *dst;
+
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst == NULL) {
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *final_p, final;
+		struct flowi6 fl6;
+
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = sk->sk_protocol;
+		fl6.daddr = sk->sk_v6_daddr;
+		fl6.saddr = np->saddr;
+		fl6.flowlabel = np->flow_label;
+		fl6.flowi6_oif = sk->sk_bound_dev_if;
+		fl6.flowi6_mark = sk->sk_mark;
+		fl6.fl6_dport = inet->inet_dport;
+		fl6.fl6_sport = inet->inet_sport;
+		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+		final_p = fl6_update_dst(&fl6, np->opt, &final);
+
+		dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+		if (IS_ERR(dst)) {
+			sk->sk_route_caps = 0;
+			sk->sk_err_soft = -PTR_ERR(dst);
+			return PTR_ERR(dst);
+		}
 
-	ptr[1] = __alloc_percpu(mibsize, mibalign);
-	if (!ptr[1])
-		goto err1;
+		__ip6_dst_store(sk, dst, NULL, NULL);
+	}
 
 	return 0;
+}
+EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
 
-err1:
-	free_percpu(ptr[0]);
-	ptr[0] = NULL;
-err0:
-	return -ENOMEM;
+bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
+{
+	const struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct inet6_skb_parm *opt = IP6CB(skb);
+
+	if (np->rxopt.all) {
+		if ((opt->hop && (np->rxopt.bits.hopopts ||
+				  np->rxopt.bits.ohopopts)) ||
+		    (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
+		     np->rxopt.bits.rxflow) ||
+		    (opt->srcrt && (np->rxopt.bits.srcrt ||
+		     np->rxopt.bits.osrcrt)) ||
+		    ((opt->dst1 || opt->dst0) &&
+		     (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
+			return true;
+	}
+	return false;
 }
+EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 
-void
-snmp6_mib_free(void *ptr[2])
+static struct packet_type ipv6_packet_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_IPV6),
+	.func = ipv6_rcv,
+};
+
+static int __init ipv6_packet_init(void)
+{
+	dev_add_pack(&ipv6_packet_type);
+	return 0;
+}
+
+static void ipv6_packet_cleanup(void)
 {
-	if (ptr == NULL)
-		return;
-	if (ptr[0])
-		free_percpu(ptr[0]);
-	if (ptr[1])
-		free_percpu(ptr[1]);
-	ptr[0] = ptr[1] = NULL;
+	dev_remove_pack(&ipv6_packet_type);
 }
 
-static int __init init_ipv6_mibs(void)
+static int __net_init ipv6_init_mibs(struct net *net)
 {
-	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
-			   __alignof__(struct ipstats_mib)) < 0)
+	int i;
+
+	net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
+	if (!net->mib.udp_stats_in6)
+		return -ENOMEM;
+	net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
+	if (!net->mib.udplite_stats_in6)
+		goto err_udplite_mib;
+	net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
+	if (!net->mib.ipv6_statistics)
 		goto err_ip_mib;
-	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
-			   __alignof__(struct icmpv6_mib)) < 0)
+
+	for_each_possible_cpu(i) {
+		struct ipstats_mib *af_inet6_stats;
+		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
+		u64_stats_init(&af_inet6_stats->syncp);
+	}
+
+
+	net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
+	if (!net->mib.icmpv6_statistics)
 		goto err_icmp_mib;
-	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
-			   __alignof__(struct udp_mib)) < 0)
-		goto err_udp_mib;
+	net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
+						GFP_KERNEL);
+	if (!net->mib.icmpv6msg_statistics)
+		goto err_icmpmsg_mib;
 	return 0;
 
-err_udp_mib:
-	snmp6_mib_free((void **)icmpv6_statistics);
+err_icmpmsg_mib:
+	free_percpu(net->mib.icmpv6_statistics);
 err_icmp_mib:
-	snmp6_mib_free((void **)ipv6_statistics);
+	free_percpu(net->mib.ipv6_statistics);
 err_ip_mib:
+	free_percpu(net->mib.udplite_stats_in6);
+err_udplite_mib:
+	free_percpu(net->mib.udp_stats_in6);
 	return -ENOMEM;
-	
 }
 
-static void cleanup_ipv6_mibs(void)
+static void ipv6_cleanup_mibs(struct net *net)
 {
-	snmp6_mib_free((void **)ipv6_statistics);
-	snmp6_mib_free((void **)icmpv6_statistics);
-	snmp6_mib_free((void **)udp_stats_in6);
+	free_percpu(net->mib.udp_stats_in6);
+	free_percpu(net->mib.udplite_stats_in6);
+	free_percpu(net->mib.ipv6_statistics);
+	free_percpu(net->mib.icmpv6_statistics);
+	kfree(net->mib.icmpv6msg_statistics);
 }
 
-static int __init inet6_init(void)
+static int __net_init inet6_net_init(struct net *net)
 {
-	struct sk_buff *dummy_skb;
-        struct list_head *r;
-	int err;
+	int err = 0;
+
+	net->ipv6.sysctl.bindv6only = 0;
+	net->ipv6.sysctl.icmpv6_time = 1*HZ;
+	net->ipv6.sysctl.flowlabel_consistency = 1;
+	atomic_set(&net->ipv6.rt_genid, 0);
 
-#ifdef MODULE
-#if 0 /* FIXME --RR */
-	if (!mod_member_present(&__this_module, can_unload))
-	  return -EINVAL;
+	err = ipv6_init_mibs(net);
+	if (err)
+		return err;
+#ifdef CONFIG_PROC_FS
+	err = udp6_proc_init(net);
+	if (err)
+		goto out;
+	err = tcp6_proc_init(net);
+	if (err)
+		goto proc_tcp6_fail;
+	err = ac6_proc_init(net);
+	if (err)
+		goto proc_ac6_fail;
+#endif
+	return err;
 
-	__this_module.can_unload = &ipv6_unload;
+#ifdef CONFIG_PROC_FS
+proc_ac6_fail:
+	tcp6_proc_exit(net);
+proc_tcp6_fail:
+	udp6_proc_exit(net);
+out:
+	ipv6_cleanup_mibs(net);
+	return err;
 #endif
+}
+
+static void __net_exit inet6_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+	udp6_proc_exit(net);
+	tcp6_proc_exit(net);
+	ac6_proc_exit(net);
 #endif
+	ipv6_cleanup_mibs(net);
+}
 
-	if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) {
-		printk(KERN_CRIT "inet6_proto_init: size fault\n");
-		return -EINVAL;
+static struct pernet_operations inet6_net_ops = {
+	.init = inet6_net_init,
+	.exit = inet6_net_exit,
+};
+
+static const struct ipv6_stub ipv6_stub_impl = {
+	.ipv6_sock_mc_join = ipv6_sock_mc_join,
+	.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
+	.ipv6_dst_lookup = ip6_dst_lookup,
+	.udpv6_encap_enable = udpv6_encap_enable,
+	.ndisc_send_na = ndisc_send_na,
+	.nd_tbl	= &nd_tbl,
+};
+
+static int __init inet6_init(void)
+{
+	struct list_head *r;
+	int err = 0;
+
+	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
+
+	/* Register the socket-side information for inet6_create.  */
+	for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
+		INIT_LIST_HEAD(r);
+
+	if (disable_ipv6_mod) {
+		pr_info("Loaded, but administratively disabled, reboot required to enable\n");
+		goto out;
 	}
 
 	err = proto_register(&tcpv6_prot, 1);
@@ -682,49 +842,57 @@ static int __init inet6_init(void)
 	if (err)
 		goto out_unregister_tcp_proto;
 
-	err = proto_register(&rawv6_prot, 1);
+	err = proto_register(&udplitev6_prot, 1);
 	if (err)
 		goto out_unregister_udp_proto;
 
+	err = proto_register(&rawv6_prot, 1);
+	if (err)
+		goto out_unregister_udplite_proto;
 
-	/* Register the socket-side information for inet6_create.  */
-	for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
-		INIT_LIST_HEAD(r);
+	err = proto_register(&pingv6_prot, 1);
+	if (err)
+		goto out_unregister_ping_proto;
 
 	/* We MUST register RAW sockets before we create the ICMP6,
 	 * IGMP6, or NDISC control sockets.
 	 */
-	inet6_register_protosw(&rawv6_protosw);
+	err = rawv6_init();
+	if (err)
+		goto out_unregister_raw_proto;
 
 	/* Register the family here so that the init calls below will
 	 * be able to create sockets. (?? is this dangerous ??)
 	 */
-	(void) sock_register(&inet6_family_ops);
-
-	/* Initialise ipv6 mibs */
-	err = init_ipv6_mibs();
+	err = sock_register(&inet6_family_ops);
 	if (err)
-		goto out_unregister_raw_proto;
-	
+		goto out_sock_register_fail;
+
 	/*
 	 *	ipngwg API draft makes clear that the correct semantics
 	 *	for TCP and UDP is to consider one TCP and UDP instance
-	 *	in a host availiable by both INET and INET6 APIs and
+	 *	in a host available by both INET and INET6 APIs and
 	 *	able to communicate via both network protocols.
 	 */
 
-#ifdef CONFIG_SYSCTL
-	ipv6_sysctl_register();
-#endif
-	err = icmpv6_init(&inet6_family_ops);
+	err = register_pernet_subsys(&inet6_net_ops);
+	if (err)
+		goto register_pernet_fail;
+	err = icmpv6_init();
 	if (err)
 		goto icmp_fail;
-	err = ndisc_init(&inet6_family_ops);
+	err = ip6_mr_init();
+	if (err)
+		goto ipmr_fail;
+	err = ndisc_init();
 	if (err)
 		goto ndisc_fail;
-	err = igmp6_init(&inet6_family_ops);
+	err = igmp6_init();
 	if (err)
 		goto igmp_fail;
+
+	ipv6_stub = &ipv6_stub_impl;
+
 	err = ipv6_netfilter_init();
 	if (err)
 		goto netfilter_fail;
@@ -733,54 +901,96 @@ static int __init inet6_init(void)
 	err = -ENOMEM;
 	if (raw6_proc_init())
 		goto proc_raw6_fail;
-	if (tcp6_proc_init())
-		goto proc_tcp6_fail;
-	if (udp6_proc_init())
-		goto proc_udp6_fail;
+	if (udplite6_proc_init())
+		goto proc_udplite6_fail;
 	if (ipv6_misc_proc_init())
 		goto proc_misc6_fail;
-
-	if (ac6_proc_init())
-		goto proc_anycast6_fail;
 	if (if6_proc_init())
 		goto proc_if6_fail;
 #endif
-	ip6_route_init();
-	ip6_flowlabel_init();
+	err = ip6_route_init();
+	if (err)
+		goto ip6_route_fail;
+	err = ndisc_late_init();
+	if (err)
+		goto ndisc_late_fail;
+	err = ip6_flowlabel_init();
+	if (err)
+		goto ip6_flowlabel_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
-	sit_init();
 
 	/* Init v6 extension headers. */
-	ipv6_rthdr_init();
-	ipv6_frag_init();
-	ipv6_nodata_init();
-	ipv6_destopt_init();
+	err = ipv6_exthdrs_init();
+	if (err)
+		goto ipv6_exthdrs_fail;
+
+	err = ipv6_frag_init();
+	if (err)
+		goto ipv6_frag_fail;
 
 	/* Init v6 transport protocols. */
-	udpv6_init();
-	tcpv6_init();
+	err = udpv6_init();
+	if (err)
+		goto udpv6_fail;
 
-	ipv6_packet_init();
-	err = 0;
+	err = udplitev6_init();
+	if (err)
+		goto udplitev6_fail;
+
+	err = tcpv6_init();
+	if (err)
+		goto tcpv6_fail;
+
+	err = ipv6_packet_init();
+	if (err)
+		goto ipv6_packet_fail;
+
+	err = pingv6_init();
+	if (err)
+		goto pingv6_fail;
+
+#ifdef CONFIG_SYSCTL
+	err = ipv6_sysctl_register();
+	if (err)
+		goto sysctl_fail;
+#endif
 out:
 	return err;
 
+#ifdef CONFIG_SYSCTL
+sysctl_fail:
+	pingv6_exit();
+#endif
+pingv6_fail:
+	ipv6_packet_cleanup();
+ipv6_packet_fail:
+	tcpv6_exit();
+tcpv6_fail:
+	udplitev6_exit();
+udplitev6_fail:
+	udpv6_exit();
+udpv6_fail:
+	ipv6_frag_exit();
+ipv6_frag_fail:
+	ipv6_exthdrs_exit();
+ipv6_exthdrs_fail:
+	addrconf_cleanup();
 addrconf_fail:
 	ip6_flowlabel_cleanup();
+ip6_flowlabel_fail:
+	ndisc_late_cleanup();
+ndisc_late_fail:
 	ip6_route_cleanup();
+ip6_route_fail:
 #ifdef CONFIG_PROC_FS
 	if6_proc_exit();
 proc_if6_fail:
-	ac6_proc_exit();
-proc_anycast6_fail:
 	ipv6_misc_proc_exit();
 proc_misc6_fail:
-	udp6_proc_exit();
-proc_udp6_fail:
-	tcp6_proc_exit();
-proc_tcp6_fail:
+	udplite6_proc_exit();
+proc_udplite6_fail:
 	raw6_proc_exit();
 proc_raw6_fail:
 #endif
@@ -790,14 +1000,22 @@ netfilter_fail:
 igmp_fail:
 	ndisc_cleanup();
 ndisc_fail:
+	ip6_mr_cleanup();
+ipmr_fail:
 	icmpv6_cleanup();
 icmp_fail:
-#ifdef CONFIG_SYSCTL
-	ipv6_sysctl_unregister();
-#endif
-	cleanup_ipv6_mibs();
+	unregister_pernet_subsys(&inet6_net_ops);
+register_pernet_fail:
+	sock_unregister(PF_INET6);
+	rtnl_unregister_all(PF_INET6);
+out_sock_register_fail:
+	rawv6_exit();
+out_unregister_ping_proto:
+	proto_unregister(&pingv6_prot);
 out_unregister_raw_proto:
 	proto_unregister(&rawv6_prot);
+out_unregister_udplite_proto:
+	proto_unregister(&udplitev6_prot);
 out_unregister_udp_proto:
 	proto_unregister(&udpv6_prot);
 out_unregister_tcp_proto:
@@ -806,36 +1024,4 @@ out_unregister_tcp_proto:
 }
 module_init(inet6_init);
 
-static void __exit inet6_exit(void)
-{
-	/* First of all disallow new sockets creation. */
-	sock_unregister(PF_INET6);
-#ifdef CONFIG_PROC_FS
-	if6_proc_exit();
-	ac6_proc_exit();
- 	ipv6_misc_proc_exit();
- 	udp6_proc_exit();
- 	tcp6_proc_exit();
- 	raw6_proc_exit();
-#endif
-	/* Cleanup code parts. */
-	sit_cleanup();
-	ip6_flowlabel_cleanup();
-	addrconf_cleanup();
-	ip6_route_cleanup();
-	ipv6_packet_cleanup();
-	igmp6_cleanup();
-	ipv6_netfilter_fini();
-	ndisc_cleanup();
-	icmpv6_cleanup();
-#ifdef CONFIG_SYSCTL
-	ipv6_sysctl_unregister();	
-#endif
-	cleanup_ipv6_mibs();
-	proto_unregister(&rawv6_prot);
-	proto_unregister(&udpv6_prot);
-	proto_unregister(&tcpv6_prot);
-}
-module_exit(inet6_exit);
-
 MODULE_ALIAS_NETPROTO(PF_INET6);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f3629730eb1..72a4930bdc0 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -1,42 +1,119 @@
 /*
  * Copyright (C)2002 USAGI/WIDE Project
- * 
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
  *
  * Authors
  *
- *	Mitsuru KANDA @USAGI       : IPv6 Support 
+ *	Mitsuru KANDA @USAGI       : IPv6 Support
  * 	Kazunori MIYAZAWA @USAGI   :
  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
- * 	
+ *
  * 	This file is derived from net/ipv4/ah.c.
  */
 
-#include <linux/config.h>
+#define pr_fmt(fmt) "IPv6: " fmt
+
+#include <crypto/hash.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <net/ip.h>
 #include <net/ah.h>
 #include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
 #include <linux/string.h>
+#include <linux/scatterlist.h>
+#include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <net/xfrm.h>
-#include <asm/scatterlist.h>
 
-static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
+#define IPV6HDR_BASELEN 8
+
+struct tmp_ext {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+		struct in6_addr saddr;
+#endif
+		struct in6_addr daddr;
+		char hdrs[0];
+};
+
+struct ah_skb_cb {
+	struct xfrm_skb_cb xfrm;
+	void *tmp;
+};
+
+#define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0]))
+
+static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags,
+			  unsigned int size)
+{
+	unsigned int len;
+
+	len = size + crypto_ahash_digestsize(ahash) +
+	      (crypto_ahash_alignmask(ahash) &
+	       ~(crypto_tfm_ctx_alignment() - 1));
+
+	len = ALIGN(len, crypto_tfm_ctx_alignment());
+
+	len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash);
+	len = ALIGN(len, __alignof__(struct scatterlist));
+
+	len += sizeof(struct scatterlist) * nfrags;
+
+	return kmalloc(len, GFP_ATOMIC);
+}
+
+static inline struct tmp_ext *ah_tmp_ext(void *base)
+{
+	return base + IPV6HDR_BASELEN;
+}
+
+static inline u8 *ah_tmp_auth(u8 *tmp, unsigned int offset)
+{
+	return tmp + offset;
+}
+
+static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp,
+			     unsigned int offset)
+{
+	return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1);
+}
+
+static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash,
+					       u8 *icv)
+{
+	struct ahash_request *req;
+
+	req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash),
+				crypto_tfm_ctx_alignment());
+
+	ahash_request_set_tfm(req, ahash);
+
+	return req;
+}
+
+static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
+					     struct ahash_request *req)
+{
+	return (void *)ALIGN((unsigned long)(req + 1) +
+			     crypto_ahash_reqsize(ahash),
+			     __alignof__(struct scatterlist));
+}
+
+static bool zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
 {
 	u8 *opt = (u8 *)opthdr;
 	int len = ipv6_optlen(opthdr);
@@ -50,11 +127,11 @@ static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
 
 		switch (opt[off]) {
 
-		case IPV6_TLV_PAD0:
+		case IPV6_TLV_PAD1:
 			optlen = 1;
 			break;
 		default:
-			if (len < 2) 
+			if (len < 2)
 				goto bad;
 			optlen = opt[off+1]+2;
 			if (len < optlen)
@@ -68,11 +145,73 @@ static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
 		len -= optlen;
 	}
 	if (len == 0)
-		return 1;
+		return true;
 
 bad:
-	return 0;
+	return false;
+}
+
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+/**
+ *	ipv6_rearrange_destopt - rearrange IPv6 destination options header
+ *	@iph: IPv6 header
+ *	@destopt: destionation options header
+ */
+static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt)
+{
+	u8 *opt = (u8 *)destopt;
+	int len = ipv6_optlen(destopt);
+	int off = 0;
+	int optlen = 0;
+
+	off += 2;
+	len -= 2;
+
+	while (len > 0) {
+
+		switch (opt[off]) {
+
+		case IPV6_TLV_PAD1:
+			optlen = 1;
+			break;
+		default:
+			if (len < 2)
+				goto bad;
+			optlen = opt[off+1]+2;
+			if (len < optlen)
+				goto bad;
+
+			/* Rearrange the source address in @iph and the
+			 * addresses in home address option for final source.
+			 * See 11.3.2 of RFC 3775 for details.
+			 */
+			if (opt[off] == IPV6_TLV_HAO) {
+				struct in6_addr final_addr;
+				struct ipv6_destopt_hao *hao;
+
+				hao = (struct ipv6_destopt_hao *)&opt[off];
+				if (hao->length != sizeof(hao->addr)) {
+					net_warn_ratelimited("destopt hao: invalid header length: %u\n",
+							     hao->length);
+					goto bad;
+				}
+				final_addr = hao->addr;
+				hao->addr = iph->saddr;
+				iph->saddr = final_addr;
+			}
+			break;
+		}
+
+		off += optlen;
+		len -= optlen;
+	}
+	/* Note: ok if len == 0 */
+bad:
+	return;
 }
+#else
+static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {}
+#endif
 
 /**
  *	ipv6_rearrange_rthdr - rearrange IPv6 routing header
@@ -92,7 +231,7 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
 	segments_left = rthdr->segments_left;
 	if (segments_left == 0)
 		return;
-	rthdr->segments_left = 0; 
+	rthdr->segments_left = 0;
 
 	/* The value of rthdr->hdrlen has been verified either by the system
 	 * call if it is locally generated, or by ipv6_rthdr_rcv() for incoming
@@ -104,16 +243,16 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
 	segments = rthdr->hdrlen >> 1;
 
 	addrs = ((struct rt0_hdr *)rthdr)->addr;
-	ipv6_addr_copy(&final_addr, addrs + segments - 1);
+	final_addr = addrs[segments - 1];
 
 	addrs += segments - segments_left;
 	memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
 
-	ipv6_addr_copy(addrs, &iph->daddr);
-	ipv6_addr_copy(&iph->daddr, &final_addr);
+	addrs[0] = iph->daddr;
+	iph->daddr = final_addr;
 }
 
-static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
+static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
 {
 	union {
 		struct ipv6hdr *iph;
@@ -128,8 +267,10 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
 
 	while (exthdr.raw < end) {
 		switch (nexthdr) {
-		case NEXTHDR_HOP:
 		case NEXTHDR_DEST:
+			if (dir == XFRM_POLICY_OUT)
+				ipv6_rearrange_destopt(iph, exthdr.opth);
+		case NEXTHDR_HOP:
 			if (!zero_out_mutable_opts(exthdr.opth)) {
 				LIMIT_NETDEBUG(
 					KERN_WARNING "overrun %sopts\n",
@@ -154,49 +295,119 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
 	return 0;
 }
 
+static void ah6_output_done(struct crypto_async_request *base, int err)
+{
+	int extlen;
+	u8 *iph_base;
+	u8 *icv;
+	struct sk_buff *skb = base->data;
+	struct xfrm_state *x = skb_dst(skb)->xfrm;
+	struct ah_data *ahp = x->data;
+	struct ipv6hdr *top_iph = ipv6_hdr(skb);
+	struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+	struct tmp_ext *iph_ext;
+
+	extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
+	if (extlen)
+		extlen += sizeof(*iph_ext);
+
+	iph_base = AH_SKB_CB(skb)->tmp;
+	iph_ext = ah_tmp_ext(iph_base);
+	icv = ah_tmp_icv(ahp->ahash, iph_ext, extlen);
+
+	memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
+	memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
+
+	if (extlen) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+		memcpy(&top_iph->saddr, iph_ext, extlen);
+#else
+		memcpy(&top_iph->daddr, iph_ext, extlen);
+#endif
+	}
+
+	kfree(AH_SKB_CB(skb)->tmp);
+	xfrm_output_resume(skb, err);
+}
+
 static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
+	int nfrags;
 	int extlen;
+	u8 *iph_base;
+	u8 *icv;
+	u8 nexthdr;
+	struct sk_buff *trailer;
+	struct crypto_ahash *ahash;
+	struct ahash_request *req;
+	struct scatterlist *sg;
 	struct ipv6hdr *top_iph;
 	struct ip_auth_hdr *ah;
 	struct ah_data *ahp;
-	u8 nexthdr;
-	char tmp_base[8];
-	struct {
-		struct in6_addr daddr;
-		char hdrs[0];
-	} *tmp_ext;
+	struct tmp_ext *iph_ext;
+	int seqhi_len = 0;
+	__be32 *seqhi;
+	int sglists = 0;
+	struct scatterlist *seqhisg;
 
-	top_iph = (struct ipv6hdr *)skb->data;
+	ahp = x->data;
+	ahash = ahp->ahash;
+
+	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+		goto out;
+	nfrags = err;
+
+	skb_push(skb, -skb_network_offset(skb));
+	extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
+	if (extlen)
+		extlen += sizeof(*iph_ext);
+
+	if (x->props.flags & XFRM_STATE_ESN) {
+		sglists = 1;
+		seqhi_len = sizeof(*seqhi);
+	}
+	err = -ENOMEM;
+	iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
+				extlen + seqhi_len);
+	if (!iph_base)
+		goto out;
+
+	iph_ext = ah_tmp_ext(iph_base);
+	seqhi = (__be32 *)((char *)iph_ext + extlen);
+	icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
+	req = ah_tmp_req(ahash, icv);
+	sg = ah_req_sg(ahash, req);
+	seqhisg = sg + nfrags;
+
+	ah = ip_auth_hdr(skb);
+	memset(ah->auth_data, 0, ahp->icv_trunc_len);
+
+	top_iph = ipv6_hdr(skb);
 	top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_AH;
+	nexthdr = *skb_mac_header(skb);
+	*skb_mac_header(skb) = IPPROTO_AH;
 
 	/* When there are no extension headers, we only need to save the first
 	 * 8 bytes of the base IP header.
 	 */
-	memcpy(tmp_base, top_iph, sizeof(tmp_base));
+	memcpy(iph_base, top_iph, IPV6HDR_BASELEN);
 
-	tmp_ext = NULL;
-	extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
 	if (extlen) {
-		extlen += sizeof(*tmp_ext);
-		tmp_ext = kmalloc(extlen, GFP_ATOMIC);
-		if (!tmp_ext) {
-			err = -ENOMEM;
-			goto error;
-		}
-		memcpy(tmp_ext, &top_iph->daddr, extlen);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+		memcpy(iph_ext, &top_iph->saddr, extlen);
+#else
+		memcpy(iph_ext, &top_iph->daddr, extlen);
+#endif
 		err = ipv6_clear_mutable_options(top_iph,
-						 extlen - sizeof(*tmp_ext) +
-						 sizeof(*top_iph));
+						 extlen - sizeof(*iph_ext) +
+						 sizeof(*top_iph),
+						 XFRM_POLICY_OUT);
 		if (err)
-			goto error_free_iph;
+			goto out_free;
 	}
 
-	ah = (struct ip_auth_hdr *)skb->h.raw;
 	ah->nexthdr = nexthdr;
 
 	top_iph->priority    = 0;
@@ -205,29 +416,89 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph->flow_lbl[2] = 0;
 	top_iph->hop_limit   = 0;
 
-	ahp = x->data;
-	ah->hdrlen  = (XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + 
-				   ahp->icv_trunc_len) >> 2) - 2;
+	ah->hdrlen  = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
 
 	ah->reserved = 0;
 	ah->spi = x->id.spi;
-	ah->seq_no = htonl(++x->replay.oseq);
-	ahp->icv(ahp, skb, ah->auth_data);
+	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
 
-	err = 0;
+	sg_init_table(sg, nfrags + sglists);
+	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
 
-	memcpy(top_iph, tmp_base, sizeof(tmp_base));
-	if (tmp_ext) {
-		memcpy(&top_iph->daddr, tmp_ext, extlen);
-error_free_iph:
-		kfree(tmp_ext);
+	if (x->props.flags & XFRM_STATE_ESN) {
+		/* Attach seqhi sg right after packet payload */
+		*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+		sg_set_buf(seqhisg, seqhi, seqhi_len);
 	}
+	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
+	ahash_request_set_callback(req, 0, ah6_output_done, skb);
 
-error:
+	AH_SKB_CB(skb)->tmp = iph_base;
+
+	err = crypto_ahash_digest(req);
+	if (err) {
+		if (err == -EINPROGRESS)
+			goto out;
+
+		if (err == -EBUSY)
+			err = NET_XMIT_DROP;
+		goto out_free;
+	}
+
+	memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
+	memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
+
+	if (extlen) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+		memcpy(&top_iph->saddr, iph_ext, extlen);
+#else
+		memcpy(&top_iph->daddr, iph_ext, extlen);
+#endif
+	}
+
+out_free:
+	kfree(iph_base);
+out:
 	return err;
 }
 
-static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+static void ah6_input_done(struct crypto_async_request *base, int err)
+{
+	u8 *auth_data;
+	u8 *icv;
+	u8 *work_iph;
+	struct sk_buff *skb = base->data;
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct ah_data *ahp = x->data;
+	struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+	int hdr_len = skb_network_header_len(skb);
+	int ah_hlen = (ah->hdrlen + 2) << 2;
+
+	work_iph = AH_SKB_CB(skb)->tmp;
+	auth_data = ah_tmp_auth(work_iph, hdr_len);
+	icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
+
+	err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
+	if (err)
+		goto out;
+
+	err = ah->nexthdr;
+
+	skb->network_header += ah_hlen;
+	memcpy(skb_network_header(skb), work_iph, hdr_len);
+	__skb_pull(skb, ah_hlen + hdr_len);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -hdr_len);
+out:
+	kfree(AH_SKB_CB(skb)->tmp);
+	xfrm_input_resume(skb, err);
+}
+
+
+
+static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	/*
 	 * Before process AH
@@ -236,173 +507,233 @@ static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc
 	 *
 	 * To erase AH:
 	 * Keeping copy of cleared headers. After AH processing,
-	 * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
-	 * header length. Then copy back the copy as long as hdr_len
+	 * Moving the pointer of skb->network_header by using skb_pull as long
+	 * as AH header length. Then copy back the copy as long as hdr_len
 	 * If destination header following AH exists, copy it into after [Ext2].
-	 * 
+	 *
 	 * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
 	 * There is offset of AH before IPv6 header after the process.
 	 */
 
-	struct ipv6_auth_hdr *ah;
+	u8 *auth_data;
+	u8 *icv;
+	u8 *work_iph;
+	struct sk_buff *trailer;
+	struct crypto_ahash *ahash;
+	struct ahash_request *req;
+	struct scatterlist *sg;
+	struct ip_auth_hdr *ah;
+	struct ipv6hdr *ip6h;
 	struct ah_data *ahp;
-	unsigned char *tmp_hdr = NULL;
 	u16 hdr_len;
 	u16 ah_hlen;
 	int nexthdr;
+	int nfrags;
+	int err = -ENOMEM;
+	int seqhi_len = 0;
+	__be32 *seqhi;
+	int sglists = 0;
+	struct scatterlist *seqhisg;
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
 		goto out;
 
 	/* We are going to _remove_ AH header to keep sockets happy,
 	 * so... Later this can change. */
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
-	hdr_len = skb->data - skb->nh.raw;
-	ah = (struct ipv6_auth_hdr*)skb->data;
+	skb->ip_summed = CHECKSUM_NONE;
+
+	hdr_len = skb_network_header_len(skb);
+	ah = (struct ip_auth_hdr *)skb->data;
 	ahp = x->data;
+	ahash = ahp->ahash;
+
 	nexthdr = ah->nexthdr;
 	ah_hlen = (ah->hdrlen + 2) << 2;
 
-        if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) &&
-            ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len))
-                goto out;
+	if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) &&
+	    ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len))
+		goto out;
 
 	if (!pskb_may_pull(skb, ah_hlen))
 		goto out;
 
-	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
-	if (!tmp_hdr)
+
+	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
 		goto out;
-	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
-	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len))
+	nfrags = err;
+
+	ah = (struct ip_auth_hdr *)skb->data;
+	ip6h = ipv6_hdr(skb);
+
+	skb_push(skb, hdr_len);
+
+	if (x->props.flags & XFRM_STATE_ESN) {
+		sglists = 1;
+		seqhi_len = sizeof(*seqhi);
+	}
+
+	work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
+				ahp->icv_trunc_len + seqhi_len);
+	if (!work_iph)
 		goto out;
-	skb->nh.ipv6h->priority    = 0;
-	skb->nh.ipv6h->flow_lbl[0] = 0;
-	skb->nh.ipv6h->flow_lbl[1] = 0;
-	skb->nh.ipv6h->flow_lbl[2] = 0;
-	skb->nh.ipv6h->hop_limit   = 0;
-
-        {
-		u8 auth_data[MAX_AH_AUTH_LEN];
-
-		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
-		memset(ah->auth_data, 0, ahp->icv_trunc_len);
-		skb_push(skb, skb->data - skb->nh.raw);
-		ahp->icv(ahp, skb, ah->auth_data);
-		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
-			LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
-			x->stats.integrity_failed++;
-			goto free_out;
-		}
+
+	auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
+	seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
+	icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
+	req = ah_tmp_req(ahash, icv);
+	sg = ah_req_sg(ahash, req);
+	seqhisg = sg + nfrags;
+
+	memcpy(work_iph, ip6h, hdr_len);
+	memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
+	memset(ah->auth_data, 0, ahp->icv_trunc_len);
+
+	if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
+		goto out_free;
+
+	ip6h->priority    = 0;
+	ip6h->flow_lbl[0] = 0;
+	ip6h->flow_lbl[1] = 0;
+	ip6h->flow_lbl[2] = 0;
+	ip6h->hop_limit   = 0;
+
+	sg_init_table(sg, nfrags + sglists);
+	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+
+	if (x->props.flags & XFRM_STATE_ESN) {
+		/* Attach seqhi sg right after packet payload */
+		*seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+		sg_set_buf(seqhisg, seqhi, seqhi_len);
+	}
+
+	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
+	ahash_request_set_callback(req, 0, ah6_input_done, skb);
+
+	AH_SKB_CB(skb)->tmp = work_iph;
+
+	err = crypto_ahash_digest(req);
+	if (err) {
+		if (err == -EINPROGRESS)
+			goto out;
+
+		goto out_free;
 	}
 
-	skb->nh.raw = skb_pull(skb, ah_hlen);
-	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
-	skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
-	skb_pull(skb, hdr_len);
-	skb->h.raw = skb->data;
+	err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
+	if (err)
+		goto out_free;
 
+	skb->network_header += ah_hlen;
+	memcpy(skb_network_header(skb), work_iph, hdr_len);
+	__skb_pull(skb, ah_hlen + hdr_len);
 
-	kfree(tmp_hdr);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -hdr_len);
 
-	return nexthdr;
+	err = nexthdr;
 
-free_out:
-	kfree(tmp_hdr);
+out_free:
+	kfree(work_iph);
 out:
-	return -EINVAL;
+	return err;
 }
 
-static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 
-                    int type, int code, int offset, __u32 info)
+static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		   u8 type, u8 code, int offset, __be32 info)
 {
+	struct net *net = dev_net(skb->dev);
 	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
 	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
 	struct xfrm_state *x;
 
-	if (type != ICMPV6_DEST_UNREACH &&
-	    type != ICMPV6_PKT_TOOBIG)
-		return;
+	if (type != ICMPV6_PKT_TOOBIG &&
+	    type != NDISC_REDIRECT)
+		return 0;
 
-	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
+	x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
 	if (!x)
-		return;
-
-	NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
-		 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-		 ntohl(ah->spi), NIP6(iph->daddr));
+		return 0;
 
+	if (type == NDISC_REDIRECT)
+		ip6_redirect(skb, net, skb->dev->ifindex, 0);
+	else
+		ip6_update_pmtu(skb, net, info, 0, 0);
 	xfrm_state_put(x);
+
+	return 0;
 }
 
 static int ah6_init_state(struct xfrm_state *x)
 {
 	struct ah_data *ahp = NULL;
 	struct xfrm_algo_desc *aalg_desc;
+	struct crypto_ahash *ahash;
 
 	if (!x->aalg)
 		goto error;
 
-	/* null auth can use a zero length key */
-	if (x->aalg->alg_key_len > 512)
-		goto error;
-
 	if (x->encap)
 		goto error;
 
-	ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
+	ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
 	if (ahp == NULL)
 		return -ENOMEM;
 
-	memset(ahp, 0, sizeof(*ahp));
+	ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0);
+	if (IS_ERR(ahash))
+		goto error;
 
-	ahp->key = x->aalg->alg_key;
-	ahp->key_len = (x->aalg->alg_key_len+7)/8;
-	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-	if (!ahp->tfm)
+	ahp->ahash = ahash;
+	if (crypto_ahash_setkey(ahash, x->aalg->alg_key,
+			       (x->aalg->alg_key_len + 7) / 8))
 		goto error;
-	ahp->icv = ah_hmac_digest;
-	
+
 	/*
 	 * Lookup the algorithm description maintained by xfrm_algo,
 	 * verify crypto transform properties, and store information
 	 * we need for AH processing.  This lookup cannot fail here
-	 * after a successful crypto_alloc_tfm().
+	 * after a successful crypto_alloc_hash().
 	 */
 	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 	BUG_ON(!aalg_desc);
 
 	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-	    crypto_tfm_alg_digestsize(ahp->tfm)) {
-		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
-		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
-		       aalg_desc->uinfo.auth.icv_fullbits/8);
+	    crypto_ahash_digestsize(ahash)) {
+		pr_info("AH: %s digestsize %u != %hu\n",
+			x->aalg->alg_name, crypto_ahash_digestsize(ahash),
+			aalg_desc->uinfo.auth.icv_fullbits/8);
 		goto error;
 	}
-	
+
 	ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
-	ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
-	
+	ahp->icv_trunc_len = x->aalg->alg_trunc_len/8;
+
 	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
-	
-	ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
-	if (!ahp->work_icv)
-		goto error;
-	
-	x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
-	if (x->props.mode)
+
+	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
+					  ahp->icv_trunc_len);
+	switch (x->props.mode) {
+	case XFRM_MODE_BEET:
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
+		break;
+	default:
+		goto error;
+	}
 	x->data = ahp;
 
 	return 0;
 
 error:
 	if (ahp) {
-		kfree(ahp->work_icv);
-		crypto_free_tfm(ahp->tfm);
+		crypto_free_ahash(ahp->ahash);
 		kfree(ahp);
 	}
 	return -EINVAL;
@@ -415,39 +746,44 @@ static void ah6_destroy(struct xfrm_state *x)
 	if (!ahp)
 		return;
 
-	kfree(ahp->work_icv);
-	ahp->work_icv = NULL;
-	crypto_free_tfm(ahp->tfm);
-	ahp->tfm = NULL;
+	crypto_free_ahash(ahp->ahash);
 	kfree(ahp);
 }
 
-static struct xfrm_type ah6_type =
+static int ah6_rcv_cb(struct sk_buff *skb, int err)
+{
+	return 0;
+}
+
+static const struct xfrm_type ah6_type =
 {
 	.description	= "AH6",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_AH,
+	.flags		= XFRM_TYPE_REPLAY_PROT,
 	.init_state	= ah6_init_state,
 	.destructor	= ah6_destroy,
 	.input		= ah6_input,
-	.output		= ah6_output
+	.output		= ah6_output,
+	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
-static struct inet6_protocol ah6_protocol = {
+static struct xfrm6_protocol ah6_protocol = {
 	.handler	=	xfrm6_rcv,
+	.cb_handler	=	ah6_rcv_cb,
 	.err_handler	=	ah6_err,
-	.flags		=	INET6_PROTO_NOPOLICY,
+	.priority	=	0,
 };
 
 static int __init ah6_init(void)
 {
 	if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
-		printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
+		pr_info("%s: can't add xfrm type\n", __func__);
 		return -EAGAIN;
 	}
 
-	if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
-		printk(KERN_INFO "ipv6 ah init: can't add protocol\n");
+	if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) {
+		pr_info("%s: can't add protocol\n", __func__);
 		xfrm_unregister_type(&ah6_type, AF_INET6);
 		return -EAGAIN;
 	}
@@ -457,11 +793,11 @@ static int __init ah6_init(void)
 
 static void __exit ah6_fini(void)
 {
-	if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
-		printk(KERN_INFO "ipv6 ah close: can't remove protocol\n");
+	if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
+		pr_info("%s: can't remove protocol\n", __func__);
 
 	if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
-		printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n");
+		pr_info("%s: can't remove xfrm type\n", __func__);
 
 }
 
@@ -469,3 +805,4 @@ module_init(ah6_init);
 module_exit(ah6_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 6b729404723..21018324468 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -1,6 +1,6 @@
 /*
  *	Anycast support for IPv6
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
  *	David L Stevens (dlstevens@us.ibm.com)
@@ -13,7 +13,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -21,7 +21,6 @@
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
-#include <linux/sched.h>
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
@@ -30,7 +29,9 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
+#include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 
@@ -43,96 +44,73 @@
 
 #include <net/checksum.h>
 
-static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr);
+static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
 /* Big ac list lock for all the sockets */
-static DEFINE_RWLOCK(ipv6_sk_ac_lock);
+static DEFINE_SPINLOCK(ipv6_sk_ac_lock);
 
-static int
-ip6_onlink(struct in6_addr *addr, struct net_device *dev)
-{
-	struct inet6_dev	*idev;
-	struct inet6_ifaddr	*ifa;
-	int	onlink;
-
-	onlink = 0;
-	read_lock(&addrconf_lock);
-	idev = __in6_dev_get(dev);
-	if (idev) {
-		read_lock_bh(&idev->lock);
-		for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
-			onlink = ipv6_prefix_equal(addr, &ifa->addr,
-						   ifa->prefix_len);
-			if (onlink)
-				break;
-		}
-		read_unlock_bh(&idev->lock);
-	}
-	read_unlock(&addrconf_lock);
-	return onlink;
-}
 
 /*
  *	socket join an anycast group
  */
 
-int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
+int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev;
 	struct ipv6_ac_socklist *pac;
-	int	ishost = !ipv6_devconf.forwarding;
+	struct net *net = sock_net(sk);
+	int	ishost = !net->ipv6.devconf_all->forwarding;
 	int	err = 0;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 	if (ipv6_addr_is_multicast(addr))
 		return -EINVAL;
-	if (ipv6_chk_addr(addr, NULL, 0))
+	if (ipv6_chk_addr(net, addr, NULL, 0))
 		return -EINVAL;
 
 	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
 	if (pac == NULL)
 		return -ENOMEM;
 	pac->acl_next = NULL;
-	ipv6_addr_copy(&pac->acl_addr, addr);
+	pac->acl_addr = *addr;
 
+	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 
-		rt = rt6_lookup(addr, NULL, 0, 0);
+		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
-			dev = rt->rt6i_dev;
-			dev_hold(dev);
-			dst_release(&rt->u.dst);
+			dev = rt->dst.dev;
+			ip6_rt_put(rt);
 		} else if (ishost) {
 			err = -EADDRNOTAVAIL;
-			goto out_free_pac;
+			goto error;
 		} else {
 			/* router, no matching interface: just pick one */
-
-			dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
+			dev = dev_get_by_flags_rcu(net, IFF_UP,
+						   IFF_UP | IFF_LOOPBACK);
 		}
 	} else
-		dev = dev_get_by_index(ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (dev == NULL) {
 		err = -ENODEV;
-		goto out_free_pac;
+		goto error;
 	}
 
-	idev = in6_dev_get(dev);
+	idev = __in6_dev_get(dev);
 	if (!idev) {
 		if (ifindex)
 			err = -ENODEV;
 		else
 			err = -EADDRNOTAVAIL;
-		goto out_dev_put;
+		goto error;
 	}
 	/* reset ishost, now that we have a specific device */
 	ishost = !idev->cnf.forwarding;
-	in6_dev_put(idev);
 
 	pac->acl_ifindex = dev->ifindex;
 
@@ -141,43 +119,40 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 	 * This obviates the need for propagating anycast routes while
 	 * still allowing some non-router anycast participation.
 	 */
-	if (!ip6_onlink(addr, dev)) {
+	if (!ipv6_chk_prefix(addr, dev)) {
 		if (ishost)
 			err = -EADDRNOTAVAIL;
 		if (err)
-			goto out_dev_put;
+			goto error;
 	}
 
 	err = ipv6_dev_ac_inc(dev, addr);
-	if (err)
-		goto out_dev_put;
-
-	write_lock_bh(&ipv6_sk_ac_lock);
-	pac->acl_next = np->ipv6_ac_list;
-	np->ipv6_ac_list = pac;
-	write_unlock_bh(&ipv6_sk_ac_lock);
-
-	dev_put(dev);
-
-	return 0;
+	if (!err) {
+		spin_lock_bh(&ipv6_sk_ac_lock);
+		pac->acl_next = np->ipv6_ac_list;
+		np->ipv6_ac_list = pac;
+		spin_unlock_bh(&ipv6_sk_ac_lock);
+		pac = NULL;
+	}
 
-out_dev_put:
-	dev_put(dev);
-out_free_pac:
-	sock_kfree_s(sk, pac, sizeof(*pac));
+error:
+	rcu_read_unlock();
+	if (pac)
+		sock_kfree_s(sk, pac, sizeof(*pac));
 	return err;
 }
 
 /*
  *	socket leave an anycast group
  */
-int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
+int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net_device *dev;
 	struct ipv6_ac_socklist *pac, *prev_pac;
+	struct net *net = sock_net(sk);
 
-	write_lock_bh(&ipv6_sk_ac_lock);
+	spin_lock_bh(&ipv6_sk_ac_lock);
 	prev_pac = NULL;
 	for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
 		if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
@@ -186,7 +161,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
 		prev_pac = pac;
 	}
 	if (!pac) {
-		write_unlock_bh(&ipv6_sk_ac_lock);
+		spin_unlock_bh(&ipv6_sk_ac_lock);
 		return -ENOENT;
 	}
 	if (prev_pac)
@@ -194,13 +169,14 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
 	else
 		np->ipv6_ac_list = pac->acl_next;
 
-	write_unlock_bh(&ipv6_sk_ac_lock);
+	spin_unlock_bh(&ipv6_sk_ac_lock);
 
-	dev = dev_get_by_index(pac->acl_ifindex);
-	if (dev) {
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+	if (dev)
 		ipv6_dev_ac_dec(dev, &pac->acl_addr);
-		dev_put(dev);
-	}
+	rcu_read_unlock();
+
 	sock_kfree_s(sk, pac, sizeof(*pac));
 	return 0;
 }
@@ -210,21 +186,24 @@ void ipv6_sock_ac_close(struct sock *sk)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net_device *dev = NULL;
 	struct ipv6_ac_socklist *pac;
+	struct net *net = sock_net(sk);
 	int	prev_index;
 
-	write_lock_bh(&ipv6_sk_ac_lock);
+	if (!np->ipv6_ac_list)
+		return;
+
+	spin_lock_bh(&ipv6_sk_ac_lock);
 	pac = np->ipv6_ac_list;
 	np->ipv6_ac_list = NULL;
-	write_unlock_bh(&ipv6_sk_ac_lock);
+	spin_unlock_bh(&ipv6_sk_ac_lock);
 
 	prev_index = 0;
+	rcu_read_lock();
 	while (pac) {
 		struct ipv6_ac_socklist *next = pac->acl_next;
 
 		if (pac->acl_ifindex != prev_index) {
-			if (dev)
-				dev_put(dev);
-			dev = dev_get_by_index(pac->acl_ifindex);
+			dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
 			prev_index = pac->acl_ifindex;
 		}
 		if (dev)
@@ -232,44 +211,14 @@ void ipv6_sock_ac_close(struct sock *sk)
 		sock_kfree_s(sk, pac, sizeof(*pac));
 		pac = next;
 	}
-	if (dev)
-		dev_put(dev);
+	rcu_read_unlock();
 }
 
-#if 0
-/* The function is not used, which is funny. Apparently, author
- * supposed to use it to filter out datagrams inside udp/raw but forgot.
- *
- * It is OK, anycasts are not special comparing to delivery to unicasts.
- */
-
-int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
-{
-	struct ipv6_ac_socklist *pac;
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	int	found;
-
-	found = 0;
-	read_lock(&ipv6_sk_ac_lock);
-	for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
-		if (ifindex && pac->acl_ifindex != ifindex)
-			continue;
-		found = ipv6_addr_equal(&pac->acl_addr, addr);
-		if (found)
-			break;
-	}
-	read_unlock(&ipv6_sk_ac_lock);
-
-	return found;
-}
-
-#endif
-
 static void aca_put(struct ifacaddr6 *ac)
 {
 	if (atomic_dec_and_test(&ac->aca_refcnt)) {
 		in6_dev_put(ac->aca_idev);
-		dst_release(&ac->aca_rt->u.dst);
+		dst_release(&ac->aca_rt->dst);
 		kfree(ac);
 	}
 }
@@ -277,7 +226,7 @@ static void aca_put(struct ifacaddr6 *ac)
 /*
  *	device anycast group inc (add if not found)
  */
-int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
+int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct ifacaddr6 *aca;
 	struct inet6_dev *idev;
@@ -307,23 +256,21 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
 	 *	not found: create a new one.
 	 */
 
-	aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
+	aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
 
 	if (aca == NULL) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	rt = addrconf_dst_alloc(idev, addr, 1);
+	rt = addrconf_dst_alloc(idev, addr, true);
 	if (IS_ERR(rt)) {
 		kfree(aca);
 		err = PTR_ERR(rt);
 		goto out;
 	}
 
-	memset(aca, 0, sizeof(struct ifacaddr6));
-
-	ipv6_addr_copy(&aca->aca_addr, addr);
+	aca->aca_addr = *addr;
 	aca->aca_idev = idev;
 	aca->aca_rt = rt;
 	aca->aca_users = 1;
@@ -336,9 +283,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
 	idev->ac_list = aca;
 	write_unlock_bh(&idev->lock);
 
-	dst_hold(&rt->u.dst);
-	if (ip6_ins_rt(rt, NULL, NULL, NULL))
-		dst_release(&rt->u.dst);
+	ip6_ins_rt(rt);
 
 	addrconf_join_solict(dev, &aca->aca_addr);
 
@@ -353,7 +298,7 @@ out:
 /*
  *	device anycast group decrement
  */
-int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
+int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct ifacaddr6 *aca, *prev_aca;
 
@@ -379,66 +324,80 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	dst_hold(&aca->aca_rt->u.dst);
-	if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL))
-		dst_free(&aca->aca_rt->u.dst);
-	else
-		dst_release(&aca->aca_rt->u.dst);
+	dst_hold(&aca->aca_rt->dst);
+	ip6_del_rt(aca->aca_rt);
 
 	aca_put(aca);
 	return 0;
 }
 
-static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
+/* called with rcu_read_lock() */
+static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
 {
-	int ret;
-	struct inet6_dev *idev = in6_dev_get(dev);
+	struct inet6_dev *idev = __in6_dev_get(dev);
+
 	if (idev == NULL)
 		return -ENODEV;
-	ret = __ipv6_dev_ac_dec(idev, addr);
-	in6_dev_put(idev);
-	return ret;
+	return __ipv6_dev_ac_dec(idev, addr);
 }
-	
+
 /*
  *	check if the interface has this anycast address
+ *	called with rcu_read_lock()
  */
-static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
+static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct inet6_dev *idev;
 	struct ifacaddr6 *aca;
 
-	idev = in6_dev_get(dev);
+	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
 		for (aca = idev->ac_list; aca; aca = aca->aca_next)
 			if (ipv6_addr_equal(&aca->aca_addr, addr))
 				break;
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
-		return aca != 0;
+		return aca != NULL;
 	}
-	return 0;
+	return false;
 }
 
 /*
  *	check if given interface (or any, if dev==0) has this anycast address
  */
-int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
+bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
+			 const struct in6_addr *addr)
 {
+	bool found = false;
+
+	rcu_read_lock();
 	if (dev)
-		return ipv6_chk_acast_dev(dev, addr);
-	read_lock(&dev_base_lock);
-	for (dev=dev_base; dev; dev=dev->next)
-		if (ipv6_chk_acast_dev(dev, addr))
-			break;
-	read_unlock(&dev_base_lock);
-	return dev != 0;
+		found = ipv6_chk_acast_dev(dev, addr);
+	else
+		for_each_netdev_rcu(net, dev)
+			if (ipv6_chk_acast_dev(dev, addr)) {
+				found = true;
+				break;
+			}
+	rcu_read_unlock();
+	return found;
 }
 
+/*	check if this anycast address is link-local on given interface or
+ *	is global
+ */
+bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
+			     const struct in6_addr *addr)
+{
+	return ipv6_chk_acast_addr(net,
+				   (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ?
+				    dev : NULL),
+				   addr);
+}
 
 #ifdef CONFIG_PROC_FS
 struct ac6_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct inet6_dev *idev;
 };
@@ -449,12 +408,12 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
 {
 	struct ifacaddr6 *im = NULL;
 	struct ac6_iter_state *state = ac6_seq_private(seq);
+	struct net *net = seq_file_net(seq);
 
-	for (state->dev = dev_base, state->idev = NULL;
-	     state->dev;
-	     state->dev = state->dev->next) {
+	state->idev = NULL;
+	for_each_netdev_rcu(net, state->dev) {
 		struct inet6_dev *idev;
-		idev = in6_dev_get(state->dev);
+		idev = __in6_dev_get(state->dev);
 		if (!idev)
 			continue;
 		read_lock_bh(&idev->lock);
@@ -474,16 +433,15 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im
 
 	im = im->aca_next;
 	while (!im) {
-		if (likely(state->idev != NULL)) {
+		if (likely(state->idev != NULL))
 			read_unlock_bh(&state->idev->lock);
-			in6_dev_put(state->idev);
-		}
-		state->dev = state->dev->next;
+
+		state->dev = next_net_device_rcu(state->dev);
 		if (!state->dev) {
 			state->idev = NULL;
 			break;
 		}
-		state->idev = in6_dev_get(state->dev);
+		state->idev = __in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
 		read_lock_bh(&state->idev->lock);
@@ -502,27 +460,30 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
 {
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	return ac6_get_idx(seq, *pos);
 }
 
 static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct ifacaddr6 *im;
-	im = ac6_get_next(seq, v);
+	struct ifacaddr6 *im = ac6_get_next(seq, v);
+
 	++*pos;
 	return im;
 }
 
 static void ac6_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
 {
 	struct ac6_iter_state *state = ac6_seq_private(seq);
+
 	if (likely(state->idev != NULL)) {
 		read_unlock_bh(&state->idev->lock);
-		in6_dev_put(state->idev);
+		state->idev = NULL;
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int ac6_seq_show(struct seq_file *seq, void *v)
@@ -530,17 +491,13 @@ static int ac6_seq_show(struct seq_file *seq, void *v)
 	struct ifacaddr6 *im = (struct ifacaddr6 *)v;
 	struct ac6_iter_state *state = ac6_seq_private(seq);
 
-	seq_printf(seq,
-		   "%-4d %-15s "
-		   "%04x%04x%04x%04x%04x%04x%04x%04x "
-		   "%5d\n",
+	seq_printf(seq, "%-4d %-15s %pi6 %5d\n",
 		   state->dev->ifindex, state->dev->name,
-		   NIP6(im->aca_addr),
-		   im->aca_users);
+		   &im->aca_addr, im->aca_users);
 	return 0;
 }
 
-static struct seq_operations ac6_seq_ops = {
+static const struct seq_operations ac6_seq_ops = {
 	.start	=	ac6_seq_start,
 	.next	=	ac6_seq_next,
 	.stop	=	ac6_seq_stop,
@@ -549,46 +506,29 @@ static struct seq_operations ac6_seq_ops = {
 
 static int ac6_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int rc = -ENOMEM;
-	struct ac6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
-
-	if (!s)
-		goto out;
-
-	rc = seq_open(file, &ac6_seq_ops);
-	if (rc)
-		goto out_kfree;
-
-	seq = file->private_data;
-	seq->private = s;
-	memset(s, 0, sizeof(*s));
-out:
-	return rc;
-out_kfree:
-	kfree(s);
-	goto out;
+	return seq_open_net(inode, file, &ac6_seq_ops,
+			    sizeof(struct ac6_iter_state));
 }
 
-static struct file_operations ac6_seq_fops = {
+static const struct file_operations ac6_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	ac6_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
-int __init ac6_proc_init(void)
+int __net_init ac6_proc_init(struct net *net)
 {
-	if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
+	if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
 }
 
-void ac6_proc_exit(void)
+void ac6_proc_exit(struct net *net)
 {
-	proc_net_remove("anycast6");
+	remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index cc518405b3e..c3bf2d2e519 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -1,11 +1,9 @@
 /*
  *	common UDP/RAW code
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
- *
- *	$Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -13,16 +11,18 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/in6.h>
 #include <linux/ipv6.h>
 #include <linux/route.h>
+#include <linux/slab.h>
+#include <linux/export.h>
 
 #include <net/ipv6.h>
 #include <net/ndisc.h>
@@ -30,19 +30,26 @@
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
+#include <net/dsfield.h>
 
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
 
+static bool ipv6_mapped_addr_any(const struct in6_addr *a)
+{
+	return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
+}
+
 int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock      	*inet = inet_sk(sk);
 	struct ipv6_pinfo      	*np = inet6_sk(sk);
-	struct in6_addr		*daddr, *final_p = NULL, final;
+	struct in6_addr		*daddr, *final_p, final;
 	struct dst_entry	*dst;
-	struct flowi		fl;
+	struct flowi6		fl6;
 	struct ip6_flowlabel	*flowlabel = NULL;
+	struct ipv6_txoptions   *opt;
 	int			addr_type;
 	int			err;
 
@@ -54,19 +61,18 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	if (addr_len < SIN6_LEN_RFC2133)
-	  	return -EINVAL;
+		return -EINVAL;
 
-	if (usin->sin6_family != AF_INET6) 
-	  	return -EAFNOSUPPORT;
+	if (usin->sin6_family != AF_INET6)
+		return -EAFNOSUPPORT;
 
-	memset(&fl, 0, sizeof(fl));
+	memset(&fl6, 0, sizeof(fl6));
 	if (np->sndflow) {
-		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
-			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
 		}
 	}
 
@@ -92,29 +98,32 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		sin.sin_addr.s_addr = daddr->s6_addr32[3];
 		sin.sin_port = usin->sin6_port;
 
-		err = ip4_datagram_connect(sk, 
-					   (struct sockaddr*) &sin, 
+		err = ip4_datagram_connect(sk,
+					   (struct sockaddr *) &sin,
 					   sizeof(sin));
 
 ipv4_connected:
 		if (err)
 			goto out;
-		
-		ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr);
 
-		if (ipv6_addr_any(&np->saddr)) {
-			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff),
-				      inet->saddr);
-		}
+		ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr);
 
-		if (ipv6_addr_any(&np->rcv_saddr)) {
-			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff),
-				      inet->rcv_saddr);
+		if (ipv6_addr_any(&np->saddr) ||
+		    ipv6_mapped_addr_any(&np->saddr))
+			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+
+		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) ||
+		    ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) {
+			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+					       &sk->sk_v6_rcv_saddr);
+			if (sk->sk_prot->rehash)
+				sk->sk_prot->rehash(sk);
 		}
+
 		goto out;
 	}
 
-	if (addr_type&IPV6_ADDR_LINKLOCAL) {
+	if (__ipv6_addr_needs_scope_id(addr_type)) {
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    usin->sin6_scope_id) {
 			if (sk->sk_bound_dev_if &&
@@ -123,11 +132,11 @@ ipv4_connected:
 				goto out;
 			}
 			sk->sk_bound_dev_if = usin->sin6_scope_id;
-			if (!sk->sk_bound_dev_if &&
-			    (addr_type & IPV6_ADDR_MULTICAST))
-				fl.oif = np->mcast_oif;
 		}
 
+		if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
+			sk->sk_bound_dev_if = np->mcast_oif;
+
 		/* Connect to link-local address requires an interface */
 		if (!sk->sk_bound_dev_if) {
 			err = -EINVAL;
@@ -135,74 +144,82 @@ ipv4_connected:
 		}
 	}
 
-	ipv6_addr_copy(&np->daddr, daddr);
-	np->flow_label = fl.fl6_flowlabel;
+	sk->sk_v6_daddr = *daddr;
+	np->flow_label = fl6.flowlabel;
 
-	inet->dport = usin->sin6_port;
+	inet->inet_dport = usin->sin6_port;
 
 	/*
 	 *	Check for a route to destination an obtain the
 	 *	destination cache for it.
 	 */
 
-	fl.proto = sk->sk_protocol;
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-	fl.oif = sk->sk_bound_dev_if;
-	fl.fl_ip_dport = inet->dport;
-	fl.fl_ip_sport = inet->sport;
-
-	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
-		fl.oif = np->mcast_oif;
-
-	if (flowlabel) {
-		if (flowlabel->opt && flowlabel->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-	} else if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	fl6.flowi6_proto = sk->sk_protocol;
+	fl6.daddr = sk->sk_v6_daddr;
+	fl6.saddr = np->saddr;
+	fl6.flowi6_oif = sk->sk_bound_dev_if;
+	fl6.flowi6_mark = sk->sk_mark;
+	fl6.fl6_dport = inet->inet_dport;
+	fl6.fl6_sport = inet->inet_sport;
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
-		goto out;
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
+	if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
+		fl6.flowi6_oif = np->mcast_oif;
+
+	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+	opt = flowlabel ? flowlabel->opt : np->opt;
+	final_p = fl6_update_dst(&fl6, opt, &final);
 
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+	err = 0;
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
 		goto out;
+	}
 
 	/* source address lookup done in ip6_dst_lookup */
 
 	if (ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&np->saddr, &fl.fl6_src);
+		np->saddr = fl6.saddr;
 
-	if (ipv6_addr_any(&np->rcv_saddr)) {
-		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
-		inet->rcv_saddr = LOOPBACK4_IPV6;
+	if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+		sk->sk_v6_rcv_saddr = fl6.saddr;
+		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
+		if (sk->sk_prot->rehash)
+			sk->sk_prot->rehash(sk);
 	}
 
 	ip6_dst_store(sk, dst,
-		      ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
-		      &np->daddr : NULL);
+		      ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+		      &sk->sk_v6_daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+		      ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
+		      &np->saddr :
+#endif
+		      NULL);
 
 	sk->sk_state = TCP_ESTABLISHED;
 out:
 	fl6_sock_release(flowlabel);
 	return err;
 }
+EXPORT_SYMBOL_GPL(ip6_datagram_connect);
+
+int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
+				 int addr_len)
+{
+	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, uaddr);
+	if (sin6->sin6_family != AF_INET6)
+		return -EAFNOSUPPORT;
+	return ip6_datagram_connect(sk, uaddr, addr_len);
+}
+EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only);
 
-void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 
-		     u16 port, u32 info, u8 *payload)
+void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+		     __be16 port, u32 info, u8 *payload)
 {
 	struct ipv6_pinfo *np  = inet6_sk(sk);
-	struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
+	struct icmp6hdr *icmph = icmp6_hdr(skb);
 	struct sock_exterr_skb *serr;
 
 	if (!np->recverr)
@@ -212,25 +229,28 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 	if (!skb)
 		return;
 
+	skb->protocol = htons(ETH_P_IPV6);
+
 	serr = SKB_EXT_ERR(skb);
 	serr->ee.ee_errno = err;
 	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
-	serr->ee.ee_type = icmph->icmp6_type; 
+	serr->ee.ee_type = icmph->icmp6_type;
 	serr->ee.ee_code = icmph->icmp6_code;
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
+	serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
+				  skb_network_header(skb);
 	serr->port = port;
 
-	skb->h.raw = payload;
 	__skb_pull(skb, payload - skb->data);
+	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
 		kfree_skb(skb);
 }
 
-void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
+void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sock_exterr_skb *serr;
@@ -244,37 +264,75 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 	if (!skb)
 		return;
 
-	iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
-	skb->nh.ipv6h = iph;
-	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+	skb->protocol = htons(ETH_P_IPV6);
+
+	skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	iph = ipv6_hdr(skb);
+	iph->daddr = fl6->daddr;
 
 	serr = SKB_EXT_ERR(skb);
 	serr->ee.ee_errno = err;
 	serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
-	serr->ee.ee_type = 0; 
+	serr->ee.ee_type = 0;
 	serr->ee.ee_code = 0;
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
-	serr->port = fl->fl_ip_dport;
+	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
+	serr->port = fl6->fl6_dport;
 
-	skb->h.raw = skb->tail;
-	__skb_pull(skb, skb->tail - skb->data);
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
 		kfree_skb(skb);
 }
 
-/* 
+void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6hdr *iph;
+	struct sk_buff *skb;
+	struct ip6_mtuinfo *mtu_info;
+
+	if (!np->rxopt.bits.rxpmtu)
+		return;
+
+	skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	iph = ipv6_hdr(skb);
+	iph->daddr = fl6->daddr;
+
+	mtu_info = IP6CBMTU(skb);
+
+	mtu_info->ip6m_mtu = mtu;
+	mtu_info->ip6m_addr.sin6_family = AF_INET6;
+	mtu_info->ip6m_addr.sin6_port = 0;
+	mtu_info->ip6m_addr.sin6_flowinfo = 0;
+	mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
+	mtu_info->ip6m_addr.sin6_addr = ipv6_hdr(skb)->daddr;
+
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+	skb_reset_transport_header(skb);
+
+	skb = xchg(&np->rxpmtu, skb);
+	kfree_skb(skb);
+}
+
+/*
  *	Handle MSG_ERRQUEUE
  */
-int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sock_exterr_skb *serr;
 	struct sk_buff *skb, *skb2;
-	struct sockaddr_in6 *sin;
+	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);
 	struct {
 		struct sock_extended_err ee;
 		struct sockaddr_in6	 offender;
@@ -300,24 +358,26 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	sin = (struct sockaddr_in6 *)msg->msg_name;
 	if (sin) {
+		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
-		sin->sin6_port = serr->port; 
-		sin->sin6_scope_id = 0;
-		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
-			ipv6_addr_copy(&sin->sin6_addr,
-			  (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
+		sin->sin6_port = serr->port;
+		if (skb->protocol == htons(ETH_P_IPV6)) {
+			const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
+								  struct ipv6hdr, daddr);
+			sin->sin6_addr = ip6h->daddr;
 			if (np->sndflow)
-				sin->sin6_flowinfo = *(u32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
-			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-				sin->sin6_scope_id = IP6CB(skb)->iif;
+				sin->sin6_flowinfo = ip6_flowinfo(ip6h);
+			sin->sin6_scope_id =
+				ipv6_iface_scope_id(&sin->sin6_addr,
+						    IP6CB(skb)->iif);
 		} else {
-			ipv6_addr_set(&sin->sin6_addr, 0, 0,
-				      htonl(0xffff),
-				      *(u32*)(skb->nh.raw + serr->addr_offset));
+			ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
+					       &sin->sin6_addr);
+			sin->sin6_scope_id = 0;
 		}
+		*addr_len = sizeof(*sin);
 	}
 
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
@@ -326,19 +386,22 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
-		sin->sin6_scope_id = 0;
-		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
-			ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
+		sin->sin6_port = 0;
+		if (np->rxopt.all)
+			ip6_datagram_recv_common_ctl(sk, msg, skb);
+		if (skb->protocol == htons(ETH_P_IPV6)) {
+			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (np->rxopt.all)
-				datagram_recv_ctl(sk, msg, skb);
-			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-				sin->sin6_scope_id = IP6CB(skb)->iif;
+				ip6_datagram_recv_specific_ctl(sk, msg, skb);
+			sin->sin6_scope_id =
+				ipv6_iface_scope_id(&sin->sin6_addr,
+						    IP6CB(skb)->iif);
 		} else {
 			struct inet_sock *inet = inet_sk(sk);
 
-			ipv6_addr_set(&sin->sin6_addr, 0, 0,
-				      htonl(0xffff),
-				      skb->nh.iph->saddr);
+			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+					       &sin->sin6_addr);
+			sin->sin6_scope_id = 0;
 			if (inet->cmsg_flags)
 				ip_cmsg_recv(msg, skb);
 		}
@@ -362,45 +425,112 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		spin_unlock_bh(&sk->sk_error_queue.lock);
 	}
 
-out_free_skb:	
+out_free_skb:
 	kfree_skb(skb);
 out:
 	return err;
 }
+EXPORT_SYMBOL_GPL(ipv6_recv_error);
+
+/*
+ *	Handle IPV6_RECVPATHMTU
+ */
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
+		     int *addr_len)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	struct ip6_mtuinfo mtu_info;
+	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);
+	int err;
+	int copied;
+
+	err = -EAGAIN;
+	skb = xchg(&np->rxpmtu, NULL);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
+
+	if (sin) {
+		sin->sin6_family = AF_INET6;
+		sin->sin6_flowinfo = 0;
+		sin->sin6_port = 0;
+		sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
+		sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr;
+		*addr_len = sizeof(*sin);
+	}
+
+	put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
 
+	err = copied;
 
+out_free_skb:
+	kfree_skb(skb);
+out:
+	return err;
+}
 
-int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+
+void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg,
+				 struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct inet6_skb_parm *opt = IP6CB(skb);
+	bool is_ipv6 = skb->protocol == htons(ETH_P_IPV6);
 
 	if (np->rxopt.bits.rxinfo) {
 		struct in6_pktinfo src_info;
 
-		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+		if (is_ipv6) {
+			src_info.ipi6_ifindex = IP6CB(skb)->iif;
+			src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
+		} else {
+			src_info.ipi6_ifindex =
+				PKTINFO_SKB_CB(skb)->ipi_ifindex;
+			ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
+					       &src_info.ipi6_addr);
+		}
 		put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 	}
+}
+
+void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
+				    struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	unsigned char *nh = skb_network_header(skb);
 
 	if (np->rxopt.bits.rxhlim) {
-		int hlim = skb->nh.ipv6h->hop_limit;
+		int hlim = ipv6_hdr(skb)->hop_limit;
 		put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
 	}
 
 	if (np->rxopt.bits.rxtclass) {
-		int tclass = (ntohl(*(u32 *)skb->nh.ipv6h) >> 20) & 0xff;
+		int tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 		put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
 	}
 
-	if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
-		u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
-		put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+	if (np->rxopt.bits.rxflow) {
+		__be32 flowinfo = ip6_flowinfo((struct ipv6hdr *)nh);
+		if (flowinfo)
+			put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
 	}
 
 	/* HbH is allowed only once */
 	if (np->rxopt.bits.hopopts && opt->hop) {
-		u8 *ptr = skb->nh.raw + opt->hop;
+		u8 *ptr = nh + opt->hop;
 		put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
 	}
 
@@ -411,18 +541,18 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		 * report extension headers (except for HbH)
 		 * in order.
 		 *
-		 * Also note that IPV6_RECVRTHDRDSTOPTS is NOT 
+		 * Also note that IPV6_RECVRTHDRDSTOPTS is NOT
 		 * (and WILL NOT be) defined because
 		 * IPV6_RECVDSTOPTS is more generic. --yoshfuji
 		 */
 		unsigned int off = sizeof(struct ipv6hdr);
-		u8 nexthdr = skb->nh.ipv6h->nexthdr;
+		u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 
 		while (off <= opt->lastopt) {
-			unsigned len;
-			u8 *ptr = skb->nh.raw + off;
+			unsigned int len;
+			u8 *ptr = nh + off;
 
-			switch(nexthdr) {
+			switch (nexthdr) {
 			case IPPROTO_DSTOPTS:
 				nexthdr = ptr[0];
 				len = (ptr[1] + 1) << 3;
@@ -437,7 +567,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 				break;
 			case IPPROTO_AH:
 				nexthdr = ptr[0];
-				len = (ptr[1] + 1) << 2;
+				len = (ptr[1] + 2) << 2;
 				break;
 			default:
 				nexthdr = ptr[0];
@@ -454,35 +584,64 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		struct in6_pktinfo src_info;
 
 		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+		src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 	}
 	if (np->rxopt.bits.rxohlim) {
-		int hlim = skb->nh.ipv6h->hop_limit;
+		int hlim = ipv6_hdr(skb)->hop_limit;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
 	}
 	if (np->rxopt.bits.ohopopts && opt->hop) {
-		u8 *ptr = skb->nh.raw + opt->hop;
+		u8 *ptr = nh + opt->hop;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	if (np->rxopt.bits.odstopts && opt->dst0) {
-		u8 *ptr = skb->nh.raw + opt->dst0;
+		u8 *ptr = nh + opt->dst0;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	if (np->rxopt.bits.osrcrt && opt->srcrt) {
-		struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+		struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
 		put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
 	}
 	if (np->rxopt.bits.odstopts && opt->dst1) {
-		u8 *ptr = skb->nh.raw + opt->dst1;
+		u8 *ptr = nh + opt->dst1;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
 	}
-	return 0;
+	if (np->rxopt.bits.rxorigdstaddr) {
+		struct sockaddr_in6 sin6;
+		__be16 *ports = (__be16 *) skb_transport_header(skb);
+
+		if (skb_transport_offset(skb) + 4 <= skb->len) {
+			/* All current transport protocols have the port numbers in the
+			 * first four bytes of the transport header and this function is
+			 * written with this assumption in mind.
+			 */
+
+			sin6.sin6_family = AF_INET6;
+			sin6.sin6_addr = ipv6_hdr(skb)->daddr;
+			sin6.sin6_port = ports[1];
+			sin6.sin6_flowinfo = 0;
+			sin6.sin6_scope_id =
+				ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr,
+						    opt->iif);
+
+			put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
+		}
+	}
+}
+
+void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+			  struct sk_buff *skb)
+{
+	ip6_datagram_recv_common_ctl(sk, msg, skb);
+	ip6_datagram_recv_specific_ctl(sk, msg, skb);
 }
+EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
 
-int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
-		      struct ipv6_txoptions *opt,
-		      int *hlimit, int *tclass)
+int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
+			  struct msghdr *msg, struct flowi6 *fl6,
+			  struct ipv6_txoptions *opt,
+			  int *hlimit, int *tclass, int *dontfrag)
 {
 	struct in6_pktinfo *src_info;
 	struct cmsghdr *cmsg;
@@ -493,7 +652,6 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
 	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
 		int addr_type;
-		struct net_device *dev = NULL;
 
 		if (!CMSG_OK(msg, cmsg)) {
 			err = -EINVAL;
@@ -504,65 +662,77 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 			continue;
 
 		switch (cmsg->cmsg_type) {
- 		case IPV6_PKTINFO:
- 		case IPV6_2292PKTINFO:
- 			if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
+		case IPV6_PKTINFO:
+		case IPV6_2292PKTINFO:
+		    {
+			struct net_device *dev = NULL;
+
+			if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
 				err = -EINVAL;
 				goto exit_f;
 			}
 
 			src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
-			
+
 			if (src_info->ipi6_ifindex) {
-				if (fl->oif && src_info->ipi6_ifindex != fl->oif)
+				if (fl6->flowi6_oif &&
+				    src_info->ipi6_ifindex != fl6->flowi6_oif)
 					return -EINVAL;
-				fl->oif = src_info->ipi6_ifindex;
+				fl6->flowi6_oif = src_info->ipi6_ifindex;
 			}
 
-			addr_type = ipv6_addr_type(&src_info->ipi6_addr);
+			addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
 
-			if (addr_type == IPV6_ADDR_ANY)
-				break;
-			
-			if (addr_type & IPV6_ADDR_LINKLOCAL) {
-				if (!src_info->ipi6_ifindex)
-					return -EINVAL;
-				else {
-					dev = dev_get_by_index(src_info->ipi6_ifindex);
-					if (!dev)
-						return -ENODEV;
+			rcu_read_lock();
+			if (fl6->flowi6_oif) {
+				dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
+				if (!dev) {
+					rcu_read_unlock();
+					return -ENODEV;
 				}
+			} else if (addr_type & IPV6_ADDR_LINKLOCAL) {
+				rcu_read_unlock();
+				return -EINVAL;
 			}
-			if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) {
-				if (dev)
-					dev_put(dev);
-				err = -EINVAL;
-				goto exit_f;
+
+			if (addr_type != IPV6_ADDR_ANY) {
+				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
+				if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
+				    !ipv6_chk_addr(net, &src_info->ipi6_addr,
+						   strict ? dev : NULL, 0) &&
+				    !ipv6_chk_acast_addr_src(net, dev,
+							     &src_info->ipi6_addr))
+					err = -EINVAL;
+				else
+					fl6->saddr = src_info->ipi6_addr;
 			}
-			if (dev)
-				dev_put(dev);
 
-			ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
+			rcu_read_unlock();
+
+			if (err)
+				goto exit_f;
+
 			break;
+		    }
 
 		case IPV6_FLOWINFO:
-                        if (cmsg->cmsg_len < CMSG_LEN(4)) {
+			if (cmsg->cmsg_len < CMSG_LEN(4)) {
 				err = -EINVAL;
 				goto exit_f;
 			}
 
-			if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) {
-				if ((fl->fl6_flowlabel^*(u32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
+			if (fl6->flowlabel&IPV6_FLOWINFO_MASK) {
+				if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
 					err = -EINVAL;
 					goto exit_f;
 				}
 			}
-			fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg);
+			fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg);
 			break;
 
 		case IPV6_2292HOPOPTS:
 		case IPV6_HOPOPTS:
-                        if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+			if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
 				err = -EINVAL;
 				goto exit_f;
 			}
@@ -573,7 +743,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 				err = -EINVAL;
 				goto exit_f;
 			}
-			if (!capable(CAP_NET_RAW)) {
+			if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
 				err = -EPERM;
 				goto exit_f;
 			}
@@ -582,7 +752,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 			break;
 
 		case IPV6_2292DSTOPTS:
-                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+			if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
 				err = -EINVAL;
 				goto exit_f;
 			}
@@ -593,7 +763,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 				err = -EINVAL;
 				goto exit_f;
 			}
-			if (!capable(CAP_NET_RAW)) {
+			if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
 				err = -EPERM;
 				goto exit_f;
 			}
@@ -618,7 +788,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 				err = -EINVAL;
 				goto exit_f;
 			}
-			if (!capable(CAP_NET_RAW)) {
+			if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
 				err = -EPERM;
 				goto exit_f;
 			}
@@ -633,24 +803,31 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
 		case IPV6_2292RTHDR:
 		case IPV6_RTHDR:
-                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
+			if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
 				err = -EINVAL;
 				goto exit_f;
 			}
 
 			rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
 
-			/*
-			 *	TYPE 0
-			 */
-			if (rthdr->type) {
+			switch (rthdr->type) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+			case IPV6_SRCRT_TYPE_2:
+				if (rthdr->hdrlen != 2 ||
+				    rthdr->segments_left != 1) {
+					err = -EINVAL;
+					goto exit_f;
+				}
+				break;
+#endif
+			default:
 				err = -EINVAL;
 				goto exit_f;
 			}
 
 			len = ((rthdr->hdrlen + 1) << 3);
 
-                        if (cmsg->cmsg_len < CMSG_LEN(len)) {
+			if (cmsg->cmsg_len < CMSG_LEN(len)) {
 				err = -EINVAL;
 				goto exit_f;
 			}
@@ -683,6 +860,11 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 			}
 
 			*hlimit = *(int *)CMSG_DATA(cmsg);
+			if (*hlimit < -1 || *hlimit > 0xff) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
 			break;
 
 		case IPV6_TCLASS:
@@ -690,12 +872,11 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 			int tc;
 
 			err = -EINVAL;
-			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
 				goto exit_f;
-			}
 
 			tc = *(int *)CMSG_DATA(cmsg);
-			if (tc < 0 || tc > 0xff)
+			if (tc < -1 || tc > 0xff)
 				goto exit_f;
 
 			err = 0;
@@ -703,14 +884,59 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
 			break;
 		    }
+
+		case IPV6_DONTFRAG:
+		    {
+			int df;
+
+			err = -EINVAL;
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+				goto exit_f;
+
+			df = *(int *)CMSG_DATA(cmsg);
+			if (df < 0 || df > 1)
+				goto exit_f;
+
+			err = 0;
+			*dontfrag = df;
+
+			break;
+		    }
 		default:
 			LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
-			               cmsg->cmsg_type);
+				       cmsg->cmsg_type);
 			err = -EINVAL;
-			break;
-		};
+			goto exit_f;
+		}
 	}
 
 exit_f:
 	return err;
 }
+EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
+
+void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
+			     __u16 srcp, __u16 destp, int bucket)
+{
+	const struct in6_addr *dest, *src;
+
+	dest  = &sp->sk_v6_daddr;
+	src   = &sp->sk_v6_rcv_saddr;
+	seq_printf(seq,
+		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
+		   bucket,
+		   src->s6_addr32[0], src->s6_addr32[1],
+		   src->s6_addr32[2], src->s6_addr32[3], srcp,
+		   dest->s6_addr32[0], dest->s6_addr32[1],
+		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
+		   sp->sk_state,
+		   sk_wmem_alloc_get(sp),
+		   sk_rmem_alloc_get(sp),
+		   0, 0L, 0,
+		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+		   0,
+		   sock_i_ino(sp),
+		   atomic_read(&sp->sk_refcnt), sp,
+		   atomic_read(&sp->sk_drops));
+}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40d9a1935ab..d15da137714 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -1,401 +1,655 @@
 /*
  * Copyright (C)2002 USAGI/WIDE Project
- * 
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
  *
  * Authors
  *
- *	Mitsuru KANDA @USAGI       : IPv6 Support 
+ *	Mitsuru KANDA @USAGI       : IPv6 Support
  * 	Kazunori MIYAZAWA @USAGI   :
  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
- * 	
+ *
  * 	This file is derived from net/ipv4/esp.c
  */
 
-#include <linux/config.h>
+#define pr_fmt(fmt) "IPv6: " fmt
+
+#include <crypto/aead.h>
+#include <crypto/authenc.h>
+#include <linux/err.h>
 #include <linux/module.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/esp.h>
-#include <asm/scatterlist.h>
-#include <linux/crypto.h>
+#include <linux/scatterlist.h>
 #include <linux/kernel.h>
 #include <linux/pfkeyv2.h>
 #include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <linux/icmpv6.h>
 
+struct esp_skb_cb {
+	struct xfrm_skb_cb xfrm;
+	void *tmp;
+};
+
+#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
+
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu);
+
+/*
+ * Allocate an AEAD request structure with extra space for SG and IV.
+ *
+ * For alignment considerations the upper 32 bits of the sequence number are
+ * placed at the front, if present. Followed by the IV, the request and finally
+ * the SG list.
+ *
+ * TODO: Use spare space in skb for this where possible.
+ */
+static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
+{
+	unsigned int len;
+
+	len = seqihlen;
+
+	len += crypto_aead_ivsize(aead);
+
+	if (len) {
+		len += crypto_aead_alignmask(aead) &
+		       ~(crypto_tfm_ctx_alignment() - 1);
+		len = ALIGN(len, crypto_tfm_ctx_alignment());
+	}
+
+	len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
+	len = ALIGN(len, __alignof__(struct scatterlist));
+
+	len += sizeof(struct scatterlist) * nfrags;
+
+	return kmalloc(len, GFP_ATOMIC);
+}
+
+static inline __be32 *esp_tmp_seqhi(void *tmp)
+{
+	return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
+}
+
+static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
+{
+	return crypto_aead_ivsize(aead) ?
+	       PTR_ALIGN((u8 *)tmp + seqhilen,
+			 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
+}
+
+static inline struct aead_givcrypt_request *esp_tmp_givreq(
+	struct crypto_aead *aead, u8 *iv)
+{
+	struct aead_givcrypt_request *req;
+
+	req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
+				crypto_tfm_ctx_alignment());
+	aead_givcrypt_set_tfm(req, aead);
+	return req;
+}
+
+static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
+{
+	struct aead_request *req;
+
+	req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
+				crypto_tfm_ctx_alignment());
+	aead_request_set_tfm(req, aead);
+	return req;
+}
+
+static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
+					     struct aead_request *req)
+{
+	return (void *)ALIGN((unsigned long)(req + 1) +
+			     crypto_aead_reqsize(aead),
+			     __alignof__(struct scatterlist));
+}
+
+static inline struct scatterlist *esp_givreq_sg(
+	struct crypto_aead *aead, struct aead_givcrypt_request *req)
+{
+	return (void *)ALIGN((unsigned long)(req + 1) +
+			     crypto_aead_reqsize(aead),
+			     __alignof__(struct scatterlist));
+}
+
+static void esp_output_done(struct crypto_async_request *base, int err)
+{
+	struct sk_buff *skb = base->data;
+
+	kfree(ESP_SKB_CB(skb)->tmp);
+	xfrm_output_resume(skb, err);
+}
+
 static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
-	int hdr_len;
-	struct ipv6hdr *top_iph;
-	struct ipv6_esp_hdr *esph;
-	struct crypto_tfm *tfm;
-	struct esp_data *esp;
+	struct ip_esp_hdr *esph;
+	struct crypto_aead *aead;
+	struct aead_givcrypt_request *req;
+	struct scatterlist *sg;
+	struct scatterlist *asg;
 	struct sk_buff *trailer;
+	void *tmp;
 	int blksize;
 	int clen;
 	int alen;
+	int plen;
+	int tfclen;
 	int nfrags;
+	int assoclen;
+	int sglists;
+	int seqhilen;
+	u8 *iv;
+	u8 *tail;
+	__be32 *seqhi;
+
+	/* skb is pure payload to encrypt */
+	aead = x->data;
+	alen = crypto_aead_authsize(aead);
+
+	tfclen = 0;
+	if (x->tfcpad) {
+		struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+		u32 padto;
+
+		padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
+		if (skb->len < padto)
+			tfclen = padto - skb->len;
+	}
+	blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+	clen = ALIGN(skb->len + 2 + tfclen, blksize);
+	plen = clen - skb->len - tfclen;
 
-	esp = x->data;
-	hdr_len = skb->h.raw - skb->data +
-		  sizeof(*esph) + esp->conf.ivlen;
-
-	/* Strip IP+ESP header. */
-	__skb_pull(skb, hdr_len);
-
-	/* Now skb is pure payload to encrypt */
-	err = -ENOMEM;
+	err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
+	if (err < 0)
+		goto error;
+	nfrags = err;
 
-	/* Round to block size */
-	clen = skb->len;
+	assoclen = sizeof(*esph);
+	sglists = 1;
+	seqhilen = 0;
 
-	alen = esp->auth.icv_trunc_len;
-	tfm = esp->conf.tfm;
-	blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
-	clen = ALIGN(clen + 2, blksize);
-	if (esp->conf.padlen)
-		clen = ALIGN(clen, esp->conf.padlen);
+	if (x->props.flags & XFRM_STATE_ESN) {
+		sglists += 2;
+		seqhilen += sizeof(__be32);
+		assoclen += seqhilen;
+	}
 
-	if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) {
+	tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+	if (!tmp) {
+		err = -ENOMEM;
 		goto error;
 	}
 
+	seqhi = esp_tmp_seqhi(tmp);
+	iv = esp_tmp_iv(aead, tmp, seqhilen);
+	req = esp_tmp_givreq(aead, iv);
+	asg = esp_givreq_sg(aead, req);
+	sg = asg + sglists;
+
 	/* Fill padding... */
+	tail = skb_tail_pointer(trailer);
+	if (tfclen) {
+		memset(tail, 0, tfclen);
+		tail += tfclen;
+	}
 	do {
 		int i;
-		for (i=0; i<clen-skb->len - 2; i++)
-			*(u8*)(trailer->tail + i) = i+1;
+		for (i = 0; i < plen - 2; i++)
+			tail[i] = i + 1;
 	} while (0);
-	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
-	pskb_put(skb, trailer, clen - skb->len);
+	tail[plen - 2] = plen - 2;
+	tail[plen - 1] = *skb_mac_header(skb);
+	pskb_put(skb, trailer, clen - skb->len + alen);
 
-	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
-	esph = (struct ipv6_esp_hdr *)skb->h.raw;
-	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
-	*(u8*)(trailer->tail - 1) = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_ESP;
+	skb_push(skb, -skb_network_offset(skb));
+	esph = ip_esp_hdr(skb);
+	*skb_mac_header(skb) = IPPROTO_ESP;
 
 	esph->spi = x->id.spi;
-	esph->seq_no = htonl(++x->replay.oseq);
+	esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+
+	sg_init_table(sg, nfrags);
+	skb_to_sgvec(skb, sg,
+		     esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
+		     clen + alen);
+
+	if ((x->props.flags & XFRM_STATE_ESN)) {
+		sg_init_table(asg, 3);
+		sg_set_buf(asg, &esph->spi, sizeof(__be32));
+		*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+		sg_set_buf(asg + 1, seqhi, seqhilen);
+		sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
+	} else
+		sg_init_one(asg, esph, sizeof(*esph));
+
+	aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
+	aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
+	aead_givcrypt_set_assoc(req, asg, assoclen);
+	aead_givcrypt_set_giv(req, esph->enc_data,
+			      XFRM_SKB_CB(skb)->seq.output.low);
+
+	ESP_SKB_CB(skb)->tmp = tmp;
+	err = crypto_aead_givencrypt(req);
+	if (err == -EINPROGRESS)
+		goto error;
 
-	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+	if (err == -EBUSY)
+		err = NET_XMIT_DROP;
 
-	do {
-		struct scatterlist *sg = &esp->sgbuf[0];
+	kfree(tmp);
 
-		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
-			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
-			if (!sg)
-				goto error;
-		}
-		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
-		crypto_cipher_encrypt(tfm, sg, sg, clen);
-		if (unlikely(sg != &esp->sgbuf[0]))
-			kfree(sg);
-	} while (0);
+error:
+	return err;
+}
 
-	if (esp->conf.ivlen) {
-		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
-		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
-	}
+static int esp_input_done2(struct sk_buff *skb, int err)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct crypto_aead *aead = x->data;
+	int alen = crypto_aead_authsize(aead);
+	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	int elen = skb->len - hlen;
+	int hdr_len = skb_network_header_len(skb);
+	int padlen;
+	u8 nexthdr[2];
+
+	kfree(ESP_SKB_CB(skb)->tmp);
+
+	if (unlikely(err))
+		goto out;
+
+	if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
+		BUG();
 
-	if (esp->auth.icv_full_len) {
-		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
-			sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
-		pskb_put(skb, trailer, alen);
+	err = -EINVAL;
+	padlen = nexthdr[0];
+	if (padlen + 2 + alen >= elen) {
+		LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage "
+			       "padlen=%d, elen=%d\n", padlen + 2, elen - alen);
+		goto out;
 	}
 
-	err = 0;
+	/* ... check padding bits here. Silly. :-) */
 
-error:
+	pskb_trim(skb, skb->len - alen - padlen - 2);
+	__skb_pull(skb, hlen);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -hdr_len);
+
+	err = nexthdr[1];
+
+	/* RFC4303: Drop dummy packets without any error */
+	if (err == IPPROTO_NONE)
+		err = -EINVAL;
+
+out:
 	return err;
 }
 
-static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+static void esp_input_done(struct crypto_async_request *base, int err)
 {
-	struct ipv6hdr *iph;
-	struct ipv6_esp_hdr *esph;
-	struct esp_data *esp = x->data;
-	struct sk_buff *trailer;
-	int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
-	int alen = esp->auth.icv_trunc_len;
-	int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
+	struct sk_buff *skb = base->data;
 
-	int hdr_len = skb->h.raw - skb->nh.raw;
+	xfrm_input_resume(skb, esp_input_done2(skb, err));
+}
+
+static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ip_esp_hdr *esph;
+	struct crypto_aead *aead = x->data;
+	struct aead_request *req;
+	struct sk_buff *trailer;
+	int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
 	int nfrags;
-	unsigned char *tmp_hdr = NULL;
+	int assoclen;
+	int sglists;
+	int seqhilen;
 	int ret = 0;
+	void *tmp;
+	__be32 *seqhi;
+	u8 *iv;
+	struct scatterlist *sg;
+	struct scatterlist *asg;
 
-	if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) {
+	if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) {
 		ret = -EINVAL;
-		goto out_nofree;
+		goto out;
 	}
 
-	if (elen <= 0 || (elen & (blksize-1))) {
+	if (elen <= 0) {
 		ret = -EINVAL;
-		goto out_nofree;
+		goto out;
 	}
 
-	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
-	if (!tmp_hdr) {
-		ret = -ENOMEM;
-		goto out_nofree;
+	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
+		ret = -EINVAL;
+		goto out;
 	}
-	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
-
-	/* If integrity check is required, do this. */
-        if (esp->auth.icv_full_len) {
-		u8 sum[esp->auth.icv_full_len];
-		u8 sum1[alen];
 
-		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+	ret = -ENOMEM;
 
-		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
-			BUG();
+	assoclen = sizeof(*esph);
+	sglists = 1;
+	seqhilen = 0;
 
-		if (unlikely(memcmp(sum, sum1, alen))) {
-			x->stats.integrity_failed++;
-			ret = -EINVAL;
-			goto out;
-		}
+	if (x->props.flags & XFRM_STATE_ESN) {
+		sglists += 2;
+		seqhilen += sizeof(__be32);
+		assoclen += seqhilen;
 	}
 
-	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
-		ret = -EINVAL;
+	tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+	if (!tmp)
 		goto out;
-	}
+
+	ESP_SKB_CB(skb)->tmp = tmp;
+	seqhi = esp_tmp_seqhi(tmp);
+	iv = esp_tmp_iv(aead, tmp, seqhilen);
+	req = esp_tmp_req(aead, iv);
+	asg = esp_req_sg(aead, req);
+	sg = asg + sglists;
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	esph = (struct ipv6_esp_hdr*)skb->data;
-	iph = skb->nh.ipv6h;
+	esph = (struct ip_esp_hdr *)skb->data;
 
 	/* Get ivec. This can be wrong, check against another impls. */
-	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
-
-        {
-		u8 nexthdr[2];
-		struct scatterlist *sg = &esp->sgbuf[0];
-		u8 padlen;
-
-		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
-			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
-			if (!sg) {
-				ret = -ENOMEM;
-				goto out;
-			}
-		}
-		skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen);
-		crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
-		if (unlikely(sg != &esp->sgbuf[0]))
-			kfree(sg);
-
-		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
-			BUG();
-
-		padlen = nexthdr[0];
-		if (padlen+2 >= elen) {
-			LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen);
-			ret = -EINVAL;
-			goto out;
-		}
-		/* ... check padding bits here. Silly. :-) */ 
-
-		pskb_trim(skb, skb->len - alen - padlen - 2);
-		skb->h.raw = skb_pull(skb, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen);
-		skb->nh.raw += sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
-		memcpy(skb->nh.raw, tmp_hdr, hdr_len);
-		skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
-		ret = nexthdr[1];
-	}
+	iv = esph->enc_data;
+
+	sg_init_table(sg, nfrags);
+	skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
+
+	if ((x->props.flags & XFRM_STATE_ESN)) {
+		sg_init_table(asg, 3);
+		sg_set_buf(asg, &esph->spi, sizeof(__be32));
+		*seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+		sg_set_buf(asg + 1, seqhi, seqhilen);
+		sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
+	} else
+		sg_init_one(asg, esph, sizeof(*esph));
+
+	aead_request_set_callback(req, 0, esp_input_done, skb);
+	aead_request_set_crypt(req, sg, sg, elen, iv);
+	aead_request_set_assoc(req, asg, assoclen);
+
+	ret = crypto_aead_decrypt(req);
+	if (ret == -EINPROGRESS)
+		goto out;
+
+	ret = esp_input_done2(skb, ret);
 
 out:
-	kfree(tmp_hdr);
-out_nofree:
 	return ret;
 }
 
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
 {
-	struct esp_data *esp = x->data;
-	u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	struct crypto_aead *aead = x->data;
+	u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+	unsigned int net_adj;
 
-	if (x->props.mode) {
-		mtu = ALIGN(mtu + 2, blksize);
-	} else {
-		/* The worst case. */
-		u32 padsize = ((blksize - 1) & 7) + 1;
-		mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
-	}
-	if (esp->conf.padlen)
-		mtu = ALIGN(mtu, esp->conf.padlen);
+	if (x->props.mode != XFRM_MODE_TUNNEL)
+		net_adj = sizeof(struct ipv6hdr);
+	else
+		net_adj = 0;
 
-	return mtu + x->props.header_len + esp->auth.icv_full_len;
+	return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
+		 net_adj) & ~(blksize - 1)) + net_adj - 2;
 }
 
-static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                     int type, int code, int offset, __u32 info)
+static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		    u8 type, u8 code, int offset, __be32 info)
 {
-	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
-	struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset);
+	struct net *net = dev_net(skb->dev);
+	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+	struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset);
 	struct xfrm_state *x;
 
-	if (type != ICMPV6_DEST_UNREACH && 
-	    type != ICMPV6_PKT_TOOBIG)
-		return;
+	if (type != ICMPV6_PKT_TOOBIG &&
+	    type != NDISC_REDIRECT)
+		return 0;
 
-	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
+	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+			      esph->spi, IPPROTO_ESP, AF_INET6);
 	if (!x)
-		return;
-	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/"
-			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 
-			ntohl(esph->spi), NIP6(iph->daddr));
+		return 0;
+
+	if (type == NDISC_REDIRECT)
+		ip6_redirect(skb, net, skb->dev->ifindex, 0);
+	else
+		ip6_update_pmtu(skb, net, info, 0, 0);
 	xfrm_state_put(x);
+
+	return 0;
 }
 
 static void esp6_destroy(struct xfrm_state *x)
 {
-	struct esp_data *esp = x->data;
+	struct crypto_aead *aead = x->data;
 
-	if (!esp)
+	if (!aead)
 		return;
 
-	crypto_free_tfm(esp->conf.tfm);
-	esp->conf.tfm = NULL;
-	kfree(esp->conf.ivec);
-	esp->conf.ivec = NULL;
-	crypto_free_tfm(esp->auth.tfm);
-	esp->auth.tfm = NULL;
-	kfree(esp->auth.work_icv);
-	esp->auth.work_icv = NULL;
-	kfree(esp);
+	crypto_free_aead(aead);
 }
 
-static int esp6_init_state(struct xfrm_state *x)
+static int esp_init_aead(struct xfrm_state *x)
+{
+	struct crypto_aead *aead;
+	int err;
+
+	aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
+	err = PTR_ERR(aead);
+	if (IS_ERR(aead))
+		goto error;
+
+	x->data = aead;
+
+	err = crypto_aead_setkey(aead, x->aead->alg_key,
+				 (x->aead->alg_key_len + 7) / 8);
+	if (err)
+		goto error;
+
+	err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8);
+	if (err)
+		goto error;
+
+error:
+	return err;
+}
+
+static int esp_init_authenc(struct xfrm_state *x)
 {
-	struct esp_data *esp = NULL;
+	struct crypto_aead *aead;
+	struct crypto_authenc_key_param *param;
+	struct rtattr *rta;
+	char *key;
+	char *p;
+	char authenc_name[CRYPTO_MAX_ALG_NAME];
+	unsigned int keylen;
+	int err;
 
-	/* null auth and encryption can have zero length keys */
-	if (x->aalg) {
-		if (x->aalg->alg_key_len > 512)
-			goto error;
-	}
+	err = -EINVAL;
 	if (x->ealg == NULL)
 		goto error;
 
-	if (x->encap)
+	err = -ENAMETOOLONG;
+
+	if ((x->props.flags & XFRM_STATE_ESN)) {
+		if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
+			     "authencesn(%s,%s)",
+			     x->aalg ? x->aalg->alg_name : "digest_null",
+			     x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+			goto error;
+	} else {
+		if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
+			     "authenc(%s,%s)",
+			     x->aalg ? x->aalg->alg_name : "digest_null",
+			     x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+			goto error;
+	}
+
+	aead = crypto_alloc_aead(authenc_name, 0, 0);
+	err = PTR_ERR(aead);
+	if (IS_ERR(aead))
 		goto error;
 
-	esp = kmalloc(sizeof(*esp), GFP_KERNEL);
-	if (esp == NULL)
-		return -ENOMEM;
+	x->data = aead;
+
+	keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
+		 (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
+	err = -ENOMEM;
+	key = kmalloc(keylen, GFP_KERNEL);
+	if (!key)
+		goto error;
 
-	memset(esp, 0, sizeof(*esp));
+	p = key;
+	rta = (void *)p;
+	rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM;
+	rta->rta_len = RTA_LENGTH(sizeof(*param));
+	param = RTA_DATA(rta);
+	p += RTA_SPACE(sizeof(*param));
 
 	if (x->aalg) {
 		struct xfrm_algo_desc *aalg_desc;
 
-		esp->auth.key = x->aalg->alg_key;
-		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
-		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-		if (esp->auth.tfm == NULL)
-			goto error;
-		esp->auth.icv = esp_hmac_digest;
- 
+		memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8);
+		p += (x->aalg->alg_key_len + 7) / 8;
+
 		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 		BUG_ON(!aalg_desc);
- 
+
+		err = -EINVAL;
 		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-			crypto_tfm_alg_digestsize(esp->auth.tfm)) {
-				printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
-					x->aalg->alg_name,
-					crypto_tfm_alg_digestsize(esp->auth.tfm),
-					aalg_desc->uinfo.auth.icv_fullbits/8);
-				goto error;
+		    crypto_aead_authsize(aead)) {
+			NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+				 x->aalg->alg_name,
+				 crypto_aead_authsize(aead),
+				 aalg_desc->uinfo.auth.icv_fullbits/8);
+			goto free_key;
 		}
- 
-		esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
-		esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
- 
-		esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
-		if (!esp->auth.work_icv)
-			goto error;
+
+		err = crypto_aead_setauthsize(
+			aead, x->aalg->alg_trunc_len / 8);
+		if (err)
+			goto free_key;
 	}
-	esp->conf.key = x->ealg->alg_key;
-	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
-	if (x->props.ealgo == SADB_EALG_NULL)
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
+
+	param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8);
+	memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8);
+
+	err = crypto_aead_setkey(aead, key, keylen);
+
+free_key:
+	kfree(key);
+
+error:
+	return err;
+}
+
+static int esp6_init_state(struct xfrm_state *x)
+{
+	struct crypto_aead *aead;
+	u32 align;
+	int err;
+
+	if (x->encap)
+		return -EINVAL;
+
+	x->data = NULL;
+
+	if (x->aead)
+		err = esp_init_aead(x);
 	else
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
-	if (esp->conf.tfm == NULL)
-		goto error;
-	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
-	esp->conf.padlen = 0;
-	if (esp->conf.ivlen) {
-		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
-		if (unlikely(esp->conf.ivec == NULL))
-			goto error;
-		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
-	}
-	if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+		err = esp_init_authenc(x);
+
+	if (err)
 		goto error;
-	x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
-	if (x->props.mode)
+
+	aead = x->data;
+
+	x->props.header_len = sizeof(struct ip_esp_hdr) +
+			      crypto_aead_ivsize(aead);
+	switch (x->props.mode) {
+	case XFRM_MODE_BEET:
+		if (x->sel.family != AF_INET6)
+			x->props.header_len += IPV4_BEET_PHMAXLEN +
+				               (sizeof(struct ipv6hdr) - sizeof(struct iphdr));
+		break;
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
-	x->data = esp;
-	return 0;
+		break;
+	default:
+		goto error;
+	}
+
+	align = ALIGN(crypto_aead_blocksize(aead), 4);
+	x->props.trailer_len = align + 1 + crypto_aead_authsize(aead);
 
 error:
-	x->data = esp;
-	esp6_destroy(x);
-	x->data = NULL;
-	return -EINVAL;
+	return err;
+}
+
+static int esp6_rcv_cb(struct sk_buff *skb, int err)
+{
+	return 0;
 }
 
-static struct xfrm_type esp6_type =
+static const struct xfrm_type esp6_type =
 {
 	.description	= "ESP6",
 	.owner	     	= THIS_MODULE,
 	.proto	     	= IPPROTO_ESP,
+	.flags		= XFRM_TYPE_REPLAY_PROT,
 	.init_state	= esp6_init_state,
 	.destructor	= esp6_destroy,
-	.get_max_size	= esp6_get_max_size,
+	.get_mtu	= esp6_get_mtu,
 	.input		= esp6_input,
-	.output		= esp6_output
+	.output		= esp6_output,
+	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
-static struct inet6_protocol esp6_protocol = {
-	.handler 	=	xfrm6_rcv,
+static struct xfrm6_protocol esp6_protocol = {
+	.handler	=	xfrm6_rcv,
+	.cb_handler	=	esp6_rcv_cb,
 	.err_handler	=	esp6_err,
-	.flags		=	INET6_PROTO_NOPOLICY,
+	.priority	=	0,
 };
 
 static int __init esp6_init(void)
 {
 	if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
-		printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
+		pr_info("%s: can't add xfrm type\n", __func__);
 		return -EAGAIN;
 	}
-	if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
-		printk(KERN_INFO "ipv6 esp init: can't add protocol\n");
+	if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) {
+		pr_info("%s: can't add protocol\n", __func__);
 		xfrm_unregister_type(&esp6_type, AF_INET6);
 		return -EAGAIN;
 	}
@@ -405,13 +659,14 @@ static int __init esp6_init(void)
 
 static void __exit esp6_fini(void)
 {
-	if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
-		printk(KERN_INFO "ipv6 esp close: can't remove protocol\n");
+	if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
+		pr_info("%s: can't remove protocol\n", __func__);
 	if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
-		printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n");
+		pr_info("%s: can't remove xfrm type\n", __func__);
 }
 
 module_init(esp6_init);
 module_exit(esp6_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 922549581ab..8d67900aa00 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -7,8 +7,6 @@
  *	Andi Kleen		<ak@muc.de>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
  *
- *	$Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
@@ -16,7 +14,7 @@
  */
 
 /* Changes:
- *	yoshfuji		: ensure not to overrun while parsing 
+ *	yoshfuji		: ensure not to overrun while parsing
  *				  tlv options.
  *	Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs().
  *	YOSHIFUJI Hideaki @USAGI  Register inbound extension header
@@ -27,12 +25,14 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
-#include <linux/sched.h>
 #include <linux/net.h>
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/icmpv6.h>
+#include <linux/slab.h>
+#include <linux/export.h>
 
+#include <net/dst.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 
@@ -43,20 +43,23 @@
 #include <net/ndisc.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+#include <net/xfrm.h>
+#endif
 
 #include <asm/uaccess.h>
 
 /*
  *	Parsing tlv encoded headers.
  *
- *	Parsing function "func" returns 1, if parsing succeed
- *	and 0, if it failed.
+ *	Parsing function "func" returns true, if parsing succeed
+ *	and false, if it failed.
  *	It MUST NOT touch skb->h.
  */
 
 struct tlvtype_proc {
 	int	type;
-	int	(*func)(struct sk_buff *skb, int offset);
+	bool	(*func)(struct sk_buff *skb, int offset);
 };
 
 /*********************
@@ -65,11 +68,11 @@ struct tlvtype_proc {
 
 /* An unknown option is detected, decide what to do */
 
-static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
 {
-	switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
+	switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
 	case 0: /* ignore */
-		return 1;
+		return true;
 
 	case 1: /* drop packet */
 		break;
@@ -78,191 +81,294 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
 		/* Actually, it is redundant check. icmp_send
 		   will recheck in any case.
 		 */
-		if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+		if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
 			break;
 	case 2: /* send ICMP PARM PROB regardless and drop packet */
 		icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
-		return 0;
-	};
+		return false;
+	}
 
 	kfree_skb(skb);
-	return 0;
+	return false;
 }
 
 /* Parse tlv encoded option header (hop-by-hop or destination) */
 
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
+static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
 {
-	struct tlvtype_proc *curr;
-	int off = skb->h.raw - skb->nh.raw;
-	int len = ((skb->h.raw[1]+1)<<3);
+	const struct tlvtype_proc *curr;
+	const unsigned char *nh = skb_network_header(skb);
+	int off = skb_network_header_len(skb);
+	int len = (skb_transport_header(skb)[1] + 1) << 3;
+	int padlen = 0;
 
-	if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
+	if (skb_transport_offset(skb) + len > skb_headlen(skb))
 		goto bad;
 
 	off += 2;
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = skb->nh.raw[off+1]+2;
+		int optlen = nh[off + 1] + 2;
+		int i;
 
-		switch (skb->nh.raw[off]) {
-		case IPV6_TLV_PAD0:
+		switch (nh[off]) {
+		case IPV6_TLV_PAD1:
 			optlen = 1;
+			padlen++;
+			if (padlen > 7)
+				goto bad;
 			break;
 
 		case IPV6_TLV_PADN:
+			/* RFC 2460 states that the purpose of PadN is
+			 * to align the containing header to multiples
+			 * of 8. 7 is therefore the highest valid value.
+			 * See also RFC 4942, Section 2.1.9.5.
+			 */
+			padlen += optlen;
+			if (padlen > 7)
+				goto bad;
+			/* RFC 4942 recommends receiving hosts to
+			 * actively check PadN payload to contain
+			 * only zeroes.
+			 */
+			for (i = 2; i < optlen; i++) {
+				if (nh[off + i] != 0)
+					goto bad;
+			}
 			break;
 
 		default: /* Other TLV code so scan list */
 			if (optlen > len)
 				goto bad;
 			for (curr=procs; curr->type >= 0; curr++) {
-				if (curr->type == skb->nh.raw[off]) {
-					/* type specific length/alignment 
-					   checks will be performed in the 
+				if (curr->type == nh[off]) {
+					/* type specific length/alignment
+					   checks will be performed in the
 					   func(). */
-					if (curr->func(skb, off) == 0)
-						return 0;
+					if (curr->func(skb, off) == false)
+						return false;
 					break;
 				}
 			}
 			if (curr->type < 0) {
 				if (ip6_tlvopt_unknown(skb, off) == 0)
-					return 0;
+					return false;
 			}
+			padlen = 0;
 			break;
 		}
 		off += optlen;
 		len -= optlen;
 	}
+
 	if (len == 0)
-		return 1;
+		return true;
 bad:
 	kfree_skb(skb);
-	return 0;
+	return false;
 }
 
 /*****************************
   Destination options header.
  *****************************/
 
-static struct tlvtype_proc tlvprocdestopt_lst[] = {
-	/* No destination options are defined now */
-	{-1,			NULL}
-};
-
-static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
 {
-	struct sk_buff *skb = *skbp;
+	struct ipv6_destopt_hao *hao;
 	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct in6_addr tmp_addr;
+	int ret;
 
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
-	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-		kfree_skb(skb);
-		return -1;
+	if (opt->dsthao) {
+		LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
+		goto discard;
 	}
+	opt->dsthao = opt->dst1;
+	opt->dst1 = 0;
 
-	opt->lastopt = skb->h.raw - skb->nh.raw;
-	opt->dst1 = skb->h.raw - skb->nh.raw;
+	hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
 
-	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
-		skb->h.raw += ((skb->h.raw[1]+1)<<3);
-		*nhoffp = opt->dst1;
-		return 1;
+	if (hao->length != 16) {
+		LIMIT_NETDEBUG(
+			KERN_DEBUG "hao invalid option length = %d\n", hao->length);
+		goto discard;
 	}
 
-	IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-	return -1;
-}
+	if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
+		LIMIT_NETDEBUG(
+			KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr);
+		goto discard;
+	}
 
-static struct inet6_protocol destopt_protocol = {
-	.handler	=	ipv6_destopt_rcv,
-	.flags		=	INET6_PROTO_NOPOLICY,
-};
+	ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
+			       (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
+	if (unlikely(ret < 0))
+		goto discard;
 
-void __init ipv6_destopt_init(void)
-{
-	if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
-		printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
-}
+	if (skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			goto discard;
 
-/********************************
-  NONE header. No data in packet.
- ********************************/
+		/* update all variable using below by copied skbuff */
+		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) +
+						  optoff);
+		ipv6h = ipv6_hdr(skb);
+	}
 
-static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
-{
-	struct sk_buff *skb = *skbp;
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
 
+	tmp_addr = ipv6h->saddr;
+	ipv6h->saddr = hao->addr;
+	hao->addr = tmp_addr;
+
+	if (skb->tstamp.tv64 == 0)
+		__net_timestamp(skb);
+
+	return true;
+
+ discard:
 	kfree_skb(skb);
-	return 0;
+	return false;
 }
+#endif
 
-static struct inet6_protocol nodata_protocol = {
-	.handler	=	ipv6_nodata_rcv,
-	.flags		=	INET6_PROTO_NOPOLICY,
+static const struct tlvtype_proc tlvprocdestopt_lst[] = {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	{
+		.type	= IPV6_TLV_HAO,
+		.func	= ipv6_dest_hao,
+	},
+#endif
+	{-1,			NULL}
 };
 
-void __init ipv6_nodata_init(void)
+static int ipv6_destopt_rcv(struct sk_buff *skb)
 {
-	if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
-		printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
+	struct inet6_skb_parm *opt = IP6CB(skb);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	__u16 dstbuf;
+#endif
+	struct dst_entry *dst = skb_dst(skb);
+
+	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
+		IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst),
+				 IPSTATS_MIB_INHDRERRORS);
+		kfree_skb(skb);
+		return -1;
+	}
+
+	opt->lastopt = opt->dst1 = skb_network_header_len(skb);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	dstbuf = opt->dst1;
+#endif
+
+	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
+		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+		opt = IP6CB(skb);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+		opt->nhoff = dstbuf;
+#else
+		opt->nhoff = opt->dst1;
+#endif
+		return 1;
+	}
+
+	IP6_INC_STATS_BH(dev_net(dst->dev),
+			 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+	return -1;
 }
 
 /********************************
   Routing header.
  ********************************/
 
-static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+/* called with rcu_read_lock() */
+static int ipv6_rthdr_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
-	struct in6_addr *addr;
+	struct in6_addr *addr = NULL;
 	struct in6_addr daddr;
+	struct inet6_dev *idev;
 	int n, i;
-
 	struct ipv6_rt_hdr *hdr;
 	struct rt0_hdr *rthdr;
-
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
-	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+	struct net *net = dev_net(skb->dev);
+	int accept_source_route = net->ipv6.devconf_all->accept_source_route;
+
+	idev = __in6_dev_get(skb->dev);
+	if (idev && accept_source_route > idev->cnf.accept_source_route)
+		accept_source_route = idev->cnf.accept_source_route;
+
+	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
 
-	hdr = (struct ipv6_rt_hdr *) skb->h.raw;
+	hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
 
-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
+	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
 	    skb->pkt_type != PACKET_HOST) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+				 IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
 
 looped_back:
 	if (hdr->segments_left == 0) {
-		opt->lastopt = skb->h.raw - skb->nh.raw;
-		opt->srcrt = skb->h.raw - skb->nh.raw;
-		skb->h.raw += (hdr->hdrlen + 1) << 3;
+		switch (hdr->type) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+		case IPV6_SRCRT_TYPE_2:
+			/* Silently discard type 2 header unless it was
+			 * processed by own
+			 */
+			if (!addr) {
+				IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+						 IPSTATS_MIB_INADDRERRORS);
+				kfree_skb(skb);
+				return -1;
+			}
+			break;
+#endif
+		default:
+			break;
+		}
+
+		opt->lastopt = opt->srcrt = skb_network_header_len(skb);
+		skb->transport_header += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
-		*nhoffp = (&hdr->nexthdr) - skb->nh.raw;
+		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
 		return 1;
 	}
 
-	if (hdr->type != IPV6_SRCRT_TYPE_0) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
-		return -1;
-	}
-	
-	if (hdr->hdrlen & 0x01) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
-		return -1;
+	switch (hdr->type) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	case IPV6_SRCRT_TYPE_2:
+		if (accept_source_route < 0)
+			goto unknown_rh;
+		/* Silently discard invalid RTH type 2 */
+		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+					 IPSTATS_MIB_INHDRERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		break;
+#endif
+	default:
+		goto unknown_rh;
 	}
 
 	/*
@@ -273,8 +379,11 @@ looped_back:
 	n = hdr->hdrlen >> 1;
 
 	if (hdr->segments_left > n) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+				 IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  ((&hdr->segments_left) -
+				   skb_network_header(skb)));
 		return -1;
 	}
 
@@ -282,19 +391,17 @@ looped_back:
 	   Do not damage packets queued somewhere.
 	 */
 	if (skb_cloned(skb)) {
-		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
-		kfree_skb(skb);
 		/* the copy is a forwarded packet */
-		if (skb2 == NULL) {
-			IP6_INC_STATS_BH(IPSTATS_MIB_OUTDISCARDS);	
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+					 IPSTATS_MIB_OUTDISCARDS);
+			kfree_skb(skb);
 			return -1;
 		}
-		*skbp = skb = skb2;
-		opt = IP6CB(skb2);
-		hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
+		hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
 	}
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 
 	i = n - --hdr->segments_left;
@@ -303,175 +410,200 @@ looped_back:
 	addr = rthdr->addr;
 	addr += i - 1;
 
+	switch (hdr->type) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	case IPV6_SRCRT_TYPE_2:
+		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
+				     (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
+				     IPPROTO_ROUTING) < 0) {
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+					 IPSTATS_MIB_INADDRERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+					 IPSTATS_MIB_INADDRERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		break;
+#endif
+	default:
+		break;
+	}
+
 	if (ipv6_addr_is_multicast(addr)) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+				 IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
 
-	ipv6_addr_copy(&daddr, addr);
-	ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
-	ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
+	daddr = *addr;
+	*addr = ipv6_hdr(skb)->daddr;
+	ipv6_hdr(skb)->daddr = daddr;
 
-	dst_release(xchg(&skb->dst, NULL));
+	skb_dst_drop(skb);
 	ip6_route_input(skb);
-	if (skb->dst->error) {
-		skb_push(skb, skb->data - skb->nh.raw);
+	if (skb_dst(skb)->error) {
+		skb_push(skb, skb->data - skb_network_header(skb));
 		dst_input(skb);
 		return -1;
 	}
 
-	if (skb->dst->dev->flags&IFF_LOOPBACK) {
-		if (skb->nh.ipv6h->hop_limit <= 1) {
-			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+	if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
+		if (ipv6_hdr(skb)->hop_limit <= 1) {
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
-				    0, skb->dev);
+				    0);
 			kfree_skb(skb);
 			return -1;
 		}
-		skb->nh.ipv6h->hop_limit--;
+		ipv6_hdr(skb)->hop_limit--;
 		goto looped_back;
 	}
 
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 	dst_input(skb);
 	return -1;
+
+unknown_rh:
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+			  (&hdr->type) - skb_network_header(skb));
+	return -1;
 }
 
-static struct inet6_protocol rthdr_protocol = {
+static const struct inet6_protocol rthdr_protocol = {
 	.handler	=	ipv6_rthdr_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY,
 };
 
-void __init ipv6_rthdr_init(void)
-{
-	if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
-		printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
+static const struct inet6_protocol destopt_protocol = {
+	.handler	=	ipv6_destopt_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY,
 };
 
-/*
-   This function inverts received rthdr.
-   NOTE: specs allow to make it automatically only if
-   packet authenticated.
-
-   I will not discuss it here (though, I am really pissed off at
-   this stupid requirement making rthdr idea useless)
-
-   Actually, it creates severe problems  for us.
-   Embryonic requests has no associated sockets,
-   so that user have no control over it and
-   cannot not only to set reply options, but
-   even to know, that someone wants to connect
-   without success. :-(
-
-   For now we need to test the engine, so that I created
-   temporary (or permanent) backdoor.
-   If listening socket set IPV6_RTHDR to 2, then we invert header.
-                                                   --ANK (980729)
- */
+static const struct inet6_protocol nodata_protocol = {
+	.handler	=	dst_discard,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
 
-struct ipv6_txoptions *
-ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
+int __init ipv6_exthdrs_init(void)
 {
-	/* Received rthdr:
+	int ret;
 
-	   [ H1 -> H2 -> ... H_prev ]  daddr=ME
+	ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING);
+	if (ret)
+		goto out;
 
-	   Inverted result:
-	   [ H_prev -> ... -> H1 ] daddr =sender
+	ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+	if (ret)
+		goto out_rthdr;
 
-	   Note, that IP output engine will rewrite this rthdr
-	   by rotating it left by one addr.
-	 */
+	ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE);
+	if (ret)
+		goto out_destopt;
 
-	int n, i;
-	struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr;
-	struct rt0_hdr *irthdr;
-	struct ipv6_txoptions *opt;
-	int hdrlen = ipv6_optlen(hdr);
-
-	if (hdr->segments_left ||
-	    hdr->type != IPV6_SRCRT_TYPE_0 ||
-	    hdr->hdrlen & 0x01)
-		return NULL;
+out:
+	return ret;
+out_destopt:
+	inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+out_rthdr:
+	inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
+	goto out;
+};
 
-	n = hdr->hdrlen >> 1;
-	opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC);
-	if (opt == NULL)
-		return NULL;
-	memset(opt, 0, sizeof(*opt));
-	opt->tot_len = sizeof(*opt) + hdrlen;
-	opt->srcrt = (void*)(opt+1);
-	opt->opt_nflen = hdrlen;
-
-	memcpy(opt->srcrt, hdr, sizeof(*hdr));
-	irthdr = (struct rt0_hdr*)opt->srcrt;
-	irthdr->reserved = 0;
-	opt->srcrt->segments_left = n;
-	for (i=0; i<n; i++)
-		memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16);
-	return opt;
+void ipv6_exthdrs_exit(void)
+{
+	inet6_del_protocol(&nodata_protocol, IPPROTO_NONE);
+	inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+	inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
 }
 
 /**********************************
   Hop-by-hop options.
  **********************************/
 
+/*
+ * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input().
+ */
+static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb)
+{
+	return skb_dst(skb) ? ip6_dst_idev(skb_dst(skb)) : __in6_dev_get(skb->dev);
+}
+
+static inline struct net *ipv6_skb_net(struct sk_buff *skb)
+{
+	return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev);
+}
+
 /* Router Alert as of RFC 2711 */
 
-static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
+static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
 {
-	if (skb->nh.raw[optoff+1] == 2) {
-		IP6CB(skb)->ra = optoff;
-		return 1;
+	const unsigned char *nh = skb_network_header(skb);
+
+	if (nh[optoff + 1] == 2) {
+		IP6CB(skb)->flags |= IP6SKB_ROUTERALERT;
+		memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
+		return true;
 	}
 	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
-	               skb->nh.raw[optoff+1]);
+		       nh[optoff + 1]);
 	kfree_skb(skb);
-	return 0;
+	return false;
 }
 
 /* Jumbo payload */
 
-static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
+static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 {
+	const unsigned char *nh = skb_network_header(skb);
+	struct net *net = ipv6_skb_net(skb);
 	u32 pkt_len;
 
-	if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
+	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
 		LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
-		               skb->nh.raw[optoff+1]);
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			       nh[optoff+1]);
+		IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+				 IPSTATS_MIB_INHDRERRORS);
 		goto drop;
 	}
 
-	pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
+	pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
 	if (pkt_len <= IPV6_MAXPLEN) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+				 IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
-		return 0;
+		return false;
 	}
-	if (skb->nh.ipv6h->payload_len) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+	if (ipv6_hdr(skb)->payload_len) {
+		IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+				 IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
-		return 0;
+		return false;
 	}
 
 	if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+		IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+				 IPSTATS_MIB_INTRUNCATEDPKTS);
 		goto drop;
 	}
 
 	if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
 		goto drop;
 
-	return 1;
+	return true;
 
 drop:
 	kfree_skb(skb);
-	return 0;
+	return false;
 }
 
-static struct tlvtype_proc tlvprochopopt_lst[] = {
+static const struct tlvtype_proc tlvprochopopt_lst[] = {
 	{
 		.type	= IPV6_TLV_ROUTERALERT,
 		.func	= ipv6_hop_ra,
@@ -483,11 +615,30 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
 	{ -1, }
 };
 
-int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
+int ipv6_parse_hopopts(struct sk_buff *skb)
 {
-	IP6CB(skb)->hop = sizeof(struct ipv6hdr);
-	if (ip6_parse_tlv(tlvprochopopt_lst, skb))
-		return sizeof(struct ipv6hdr);
+	struct inet6_skb_parm *opt = IP6CB(skb);
+
+	/*
+	 * skb_network_header(skb) is equal to skb->data, and
+	 * skb_network_header_len(skb) is always equal to
+	 * sizeof(struct ipv6hdr) by definition of
+	 * hop-by-hop options.
+	 */
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
+	    !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
+		kfree_skb(skb);
+		return -1;
+	}
+
+	opt->hop = sizeof(struct ipv6hdr);
+	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
+		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+		opt = IP6CB(skb);
+		opt->nhoff = sizeof(struct ipv6hdr);
+		return 1;
+	}
 	return -1;
 }
 
@@ -509,7 +660,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
 	int hops;
 
 	ihdr = (struct rt0_hdr *) opt;
-	
+
 	phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
 	memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
 
@@ -519,7 +670,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
 		memcpy(phdr->addr, ihdr->addr + 1,
 		       (hops - 1) * sizeof(struct in6_addr));
 
-	ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
+	phdr->addr[hops - 1] = **addr_p;
 	*addr_p = ihdr->addr;
 
 	phdr->rt_hdr.nexthdr = *proto;
@@ -551,6 +702,7 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
 	if (opt->hopopt)
 		ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
 }
+EXPORT_SYMBOL(ipv6_push_nfrag_opts);
 
 void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
 {
@@ -565,19 +717,20 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
 
 	opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
 	if (opt2) {
-		long dif = (char*)opt2 - (char*)opt;
+		long dif = (char *)opt2 - (char *)opt;
 		memcpy(opt2, opt, opt->tot_len);
 		if (opt2->hopopt)
-			*((char**)&opt2->hopopt) += dif;
+			*((char **)&opt2->hopopt) += dif;
 		if (opt2->dst0opt)
-			*((char**)&opt2->dst0opt) += dif;
+			*((char **)&opt2->dst0opt) += dif;
 		if (opt2->dst1opt)
-			*((char**)&opt2->dst1opt) += dif;
+			*((char **)&opt2->dst1opt) += dif;
 		if (opt2->srcrt)
-			*((char**)&opt2->srcrt) += dif;
+			*((char **)&opt2->srcrt) += dif;
 	}
 	return opt2;
 }
+EXPORT_SYMBOL_GPL(ipv6_dup_options);
 
 static int ipv6_renew_option(void *ohdr,
 			     struct ipv6_opt_hdr __user *newopt, int newoptlen,
@@ -589,14 +742,14 @@ static int ipv6_renew_option(void *ohdr,
 		if (ohdr) {
 			memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
 			*hdr = (struct ipv6_opt_hdr *)*p;
-			*p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr));
+			*p += CMSG_ALIGN(ipv6_optlen(*hdr));
 		}
 	} else {
 		if (newopt) {
 			if (copy_from_user(*p, newopt, newoptlen))
 				return -EFAULT;
 			*hdr = (struct ipv6_opt_hdr *)*p;
-			if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen)
+			if (ipv6_optlen(*hdr) > newoptlen)
 				return -EINVAL;
 			*p += CMSG_ALIGN(newoptlen);
 		}
@@ -614,20 +767,24 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
 	struct ipv6_txoptions *opt2;
 	int err;
 
-	if (newtype != IPV6_HOPOPTS && opt->hopopt)
-		tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt));
-	if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt)
-		tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt));
-	if (newtype != IPV6_RTHDR && opt->srcrt)
-		tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt));
-	if (newtype != IPV6_DSTOPTS && opt->dst1opt)
-		tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
+	if (opt) {
+		if (newtype != IPV6_HOPOPTS && opt->hopopt)
+			tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt));
+		if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt)
+			tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt));
+		if (newtype != IPV6_RTHDR && opt->srcrt)
+			tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt));
+		if (newtype != IPV6_DSTOPTS && opt->dst1opt)
+			tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
+	}
+
 	if (newopt && newoptlen)
 		tot_len += CMSG_ALIGN(newoptlen);
 
 	if (!tot_len)
 		return NULL;
 
+	tot_len += sizeof(*opt2);
 	opt2 = sock_kmalloc(sk, tot_len, GFP_ATOMIC);
 	if (!opt2)
 		return ERR_PTR(-ENOBUFS);
@@ -637,25 +794,25 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
 	opt2->tot_len = tot_len;
 	p = (char *)(opt2 + 1);
 
-	err = ipv6_renew_option(opt->hopopt, newopt, newoptlen,
+	err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen,
 				newtype != IPV6_HOPOPTS,
 				&opt2->hopopt, &p);
 	if (err)
 		goto out;
 
-	err = ipv6_renew_option(opt->dst0opt, newopt, newoptlen,
+	err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen,
 				newtype != IPV6_RTHDRDSTOPTS,
 				&opt2->dst0opt, &p);
 	if (err)
 		goto out;
 
-	err = ipv6_renew_option(opt->srcrt, newopt, newoptlen,
+	err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen,
 				newtype != IPV6_RTHDR,
-				(struct ipv6_opt_hdr **)opt2->srcrt, &p);
+				(struct ipv6_opt_hdr **)&opt2->srcrt, &p);
 	if (err)
 		goto out;
 
-	err = ipv6_renew_option(opt->dst1opt, newopt, newoptlen,
+	err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen,
 				newtype != IPV6_DSTOPTS,
 				&opt2->dst1opt, &p);
 	if (err)
@@ -668,7 +825,50 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
 
 	return opt2;
 out:
-	sock_kfree_s(sk, p, tot_len);
+	sock_kfree_s(sk, opt2, opt2->tot_len);
 	return ERR_PTR(err);
 }
 
+struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
+					  struct ipv6_txoptions *opt)
+{
+	/*
+	 * ignore the dest before srcrt unless srcrt is being included.
+	 * --yoshfuji
+	 */
+	if (opt && opt->dst0opt && !opt->srcrt) {
+		if (opt_space != opt) {
+			memcpy(opt_space, opt, sizeof(*opt_space));
+			opt = opt_space;
+		}
+		opt->opt_nflen -= ipv6_optlen(opt->dst0opt);
+		opt->dst0opt = NULL;
+	}
+
+	return opt;
+}
+EXPORT_SYMBOL_GPL(ipv6_fixup_options);
+
+/**
+ * fl6_update_dst - update flowi destination address with info given
+ *                  by srcrt option, if any.
+ *
+ * @fl6: flowi6 for which daddr is to be updated
+ * @opt: struct ipv6_txoptions in which to look for srcrt opt
+ * @orig: copy of original daddr address if modified
+ *
+ * Returns NULL if no txoptions or no srcrt, otherwise returns orig
+ * and initial value of fl6->daddr set in orig
+ */
+struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
+				const struct ipv6_txoptions *opt,
+				struct in6_addr *orig)
+{
+	if (!opt || !opt->srcrt)
+		return NULL;
+
+	*orig = fl6->daddr;
+	fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
+	return orig;
+}
+EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 315bc1fbec3..8af3eb57f43 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -2,41 +2,43 @@
  * IPv6 library code, needed by static components when full IPv6 support is
  * not configured or static.
  */
+#include <linux/export.h>
 #include <net/ipv6.h>
 
-/* 
+/*
  * find out if nexthdr is a well-known extension header or a protocol
  */
 
-int ipv6_ext_hdr(u8 nexthdr)
+bool ipv6_ext_hdr(u8 nexthdr)
 {
-	/* 
+	/*
 	 * find out if nexthdr is an extension header or a protocol
 	 */
-	return ( (nexthdr == NEXTHDR_HOP)	||
+	return   (nexthdr == NEXTHDR_HOP)	||
 		 (nexthdr == NEXTHDR_ROUTING)	||
 		 (nexthdr == NEXTHDR_FRAGMENT)	||
 		 (nexthdr == NEXTHDR_AUTH)	||
 		 (nexthdr == NEXTHDR_NONE)	||
-		 (nexthdr == NEXTHDR_DEST) );
+		 (nexthdr == NEXTHDR_DEST);
 }
+EXPORT_SYMBOL(ipv6_ext_hdr);
 
 /*
  * Skip any extension headers. This is used by the ICMP module.
  *
  * Note that strictly speaking this conflicts with RFC 2460 4.0:
- * ...The contents and semantics of each extension header determine whether 
+ * ...The contents and semantics of each extension header determine whether
  * or not to proceed to the next header.  Therefore, extension headers must
  * be processed strictly in the order they appear in the packet; a
  * receiver must not, for example, scan through a packet looking for a
  * particular kind of extension header and process that header prior to
  * processing all preceding ones.
- * 
+ *
  * We do exactly this. This is a protocol bug. We can't decide after a
- * seeing an unknown discard-with-error flavour TLV option if it's a 
+ * seeing an unknown discard-with-error flavour TLV option if it's a
  * ICMP error message or not (errors should never be send in reply to
  * ICMP error messages).
- * 
+ *
  * But I see no other way to do this. This might need to be reexamined
  * when Linux implements ESP (and maybe AUTH) headers.
  * --AK
@@ -56,6 +58,9 @@ int ipv6_ext_hdr(u8 nexthdr)
  *	    it returns NULL.
  *	  - First fragment header is skipped, not-first ones
  *	    are considered as unparsable.
+ *	  - Reports the offset field of the final fragment header so it is
+ *	    possible to tell whether this is a first fragment, later fragment,
+ *	    or not fragmented.
  *	  - ESP is unparsable for now and considered like
  *	    normal payload protocol.
  *	  - Note also special handling of AUTH header. Thanks to IPsec wizards.
@@ -63,10 +68,13 @@ int ipv6_ext_hdr(u8 nexthdr)
  * --ANK (980726)
  */
 
-int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
+int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
+		     __be16 *frag_offp)
 {
 	u8 nexthdr = *nexthdrp;
 
+	*frag_offp = 0;
+
 	while (ipv6_ext_hdr(nexthdr)) {
 		struct ipv6_opt_hdr _hdr, *hp;
 		int hdrlen;
@@ -77,7 +85,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
 		if (hp == NULL)
 			return -1;
 		if (nexthdr == NEXTHDR_FRAGMENT) {
-			unsigned short _frag_off, *fp;
+			__be16 _frag_off, *fp;
 			fp = skb_header_pointer(skb,
 						start+offsetof(struct frag_hdr,
 							       frag_off),
@@ -86,13 +94,14 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
 			if (fp == NULL)
 				return -1;
 
-			if (ntohs(*fp) & ~0x7)
+			*frag_offp = *fp;
+			if (ntohs(*frag_offp) & ~0x7)
 				break;
 			hdrlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen+2)<<2; 
+			hdrlen = (hp->hdrlen+2)<<2;
 		else
-			hdrlen = ipv6_optlen(hp); 
+			hdrlen = ipv6_optlen(hp);
 
 		nexthdr = hp->nexthdr;
 		start += hdrlen;
@@ -101,6 +110,172 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
 	*nexthdrp = nexthdr;
 	return start;
 }
-
-EXPORT_SYMBOL(ipv6_ext_hdr);
 EXPORT_SYMBOL(ipv6_skip_exthdr);
+
+int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
+{
+	const unsigned char *nh = skb_network_header(skb);
+	int packet_len = skb_tail_pointer(skb) - skb_network_header(skb);
+	struct ipv6_opt_hdr *hdr;
+	int len;
+
+	if (offset + 2 > packet_len)
+		goto bad;
+	hdr = (struct ipv6_opt_hdr *)(nh + offset);
+	len = ((hdr->hdrlen + 1) << 3);
+
+	if (offset + len > packet_len)
+		goto bad;
+
+	offset += 2;
+	len -= 2;
+
+	while (len > 0) {
+		int opttype = nh[offset];
+		int optlen;
+
+		if (opttype == type)
+			return offset;
+
+		switch (opttype) {
+		case IPV6_TLV_PAD1:
+			optlen = 1;
+			break;
+		default:
+			optlen = nh[offset + 1] + 2;
+			if (optlen > len)
+				goto bad;
+			break;
+		}
+		offset += optlen;
+		len -= optlen;
+	}
+	/* not_found */
+ bad:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ipv6_find_tlv);
+
+/*
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * Note that *offset is used as input/output parameter. an if it is not zero,
+ * then it must be a valid offset to an inner IPv6 header. This can be used
+ * to explore inner IPv6 header, eg. ICMPv6 error messages.
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -1.
+ *
+ * If the first fragment doesn't contain the final protocol header or
+ * NEXTHDR_NONE it is considered invalid.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
+ * isn't NULL.
+ *
+ * if flags is not NULL and it's a fragment, then the frag flag
+ * IP6_FH_F_FRAG will be set. If it's an AH header, the
+ * IP6_FH_F_AUTH flag is set and target < 0, then this function will
+ * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this
+ * function will skip all those routing headers, where segements_left was 0.
+ */
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+		  int target, unsigned short *fragoff, int *flags)
+{
+	unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+	unsigned int len;
+	bool found;
+
+	if (fragoff)
+		*fragoff = 0;
+
+	if (*offset) {
+		struct ipv6hdr _ip6, *ip6;
+
+		ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
+		if (!ip6 || (ip6->version != 6)) {
+			printk(KERN_ERR "IPv6 header not found\n");
+			return -EBADMSG;
+		}
+		start = *offset + sizeof(struct ipv6hdr);
+		nexthdr = ip6->nexthdr;
+	}
+	len = skb->len - start;
+
+	do {
+		struct ipv6_opt_hdr _hdr, *hp;
+		unsigned int hdrlen;
+		found = (nexthdr == target);
+
+		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+			if (target < 0 || found)
+				break;
+			return -ENOENT;
+		}
+
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (hp == NULL)
+			return -EBADMSG;
+
+		if (nexthdr == NEXTHDR_ROUTING) {
+			struct ipv6_rt_hdr _rh, *rh;
+
+			rh = skb_header_pointer(skb, start, sizeof(_rh),
+						&_rh);
+			if (rh == NULL)
+				return -EBADMSG;
+
+			if (flags && (*flags & IP6_FH_F_SKIP_RH) &&
+			    rh->segments_left == 0)
+				found = false;
+		}
+
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			unsigned short _frag_off;
+			__be16 *fp;
+
+			if (flags)	/* Indicate that this is a fragment */
+				*flags |= IP6_FH_F_FRAG;
+			fp = skb_header_pointer(skb,
+						start+offsetof(struct frag_hdr,
+							       frag_off),
+						sizeof(_frag_off),
+						&_frag_off);
+			if (fp == NULL)
+				return -EBADMSG;
+
+			_frag_off = ntohs(*fp) & ~0x7;
+			if (_frag_off) {
+				if (target < 0 &&
+				    ((!ipv6_ext_hdr(hp->nexthdr)) ||
+				     hp->nexthdr == NEXTHDR_NONE)) {
+					if (fragoff)
+						*fragoff = _frag_off;
+					return hp->nexthdr;
+				}
+				return -ENOENT;
+			}
+			hdrlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH) {
+			if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
+				break;
+			hdrlen = (hp->hdrlen + 2) << 2;
+		} else
+			hdrlen = ipv6_optlen(hp);
+
+		if (!found) {
+			nexthdr = hp->nexthdr;
+			len -= hdrlen;
+			start += hdrlen;
+		}
+	} while (!found);
+
+	*offset = start;
+	return nexthdr;
+}
+EXPORT_SYMBOL(ipv6_find_hdr);
+
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
new file mode 100644
index 00000000000..447a7fbd1bb
--- /dev/null
+++ b/net/ipv6/exthdrs_offload.c
@@ -0,0 +1,41 @@
+/*
+ *	IPV6 GSO/GRO offload support
+ *	Linux INET6 implementation
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *      IPV6 Extension Header GSO/GRO support
+ */
+#include <net/protocol.h>
+#include "ip6_offload.h"
+
+static const struct net_offload rthdr_offload = {
+	.flags		=	INET6_PROTO_GSO_EXTHDR,
+};
+
+static const struct net_offload dstopt_offload = {
+	.flags		=	INET6_PROTO_GSO_EXTHDR,
+};
+
+int __init ipv6_exthdrs_offload_init(void)
+{
+	int ret;
+
+	ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
+	if (ret)
+		goto out;
+
+	ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
+	if (ret)
+		goto out_rt;
+
+out:
+	return ret;
+
+out_rt:
+	inet_del_offload(&rthdr_offload, IPPROTO_ROUTING);
+	goto out;
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
new file mode 100644
index 00000000000..b4d5e1d97c1
--- /dev/null
+++ b/net/ipv6/fib6_rules.c
@@ -0,0 +1,341 @@
+/*
+ * net/ipv6/fib6_rules.c	IPv6 Routing Policy Rules
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Authors
+ *	Thomas Graf		<tgraf@suug.ch>
+ *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/export.h>
+
+#include <net/fib_rules.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/netlink.h>
+
+struct fib6_rule {
+	struct fib_rule		common;
+	struct rt6key		src;
+	struct rt6key		dst;
+	u8			tclass;
+};
+
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   int flags, pol_lookup_t lookup)
+{
+	struct fib_lookup_arg arg = {
+		.lookup_ptr = lookup,
+		.flags = FIB_LOOKUP_NOREF,
+	};
+
+	fib_rules_lookup(net->ipv6.fib6_rules_ops,
+			 flowi6_to_flowi(fl6), flags, &arg);
+
+	if (arg.result)
+		return arg.result;
+
+	dst_hold(&net->ipv6.ip6_null_entry->dst);
+	return &net->ipv6.ip6_null_entry->dst;
+}
+
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
+{
+	struct flowi6 *flp6 = &flp->u.ip6;
+	struct rt6_info *rt = NULL;
+	struct fib6_table *table;
+	struct net *net = rule->fr_net;
+	pol_lookup_t lookup = arg->lookup_ptr;
+	int err = 0;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		err = -ENETUNREACH;
+		rt = net->ipv6.ip6_null_entry;
+		goto discard_pkt;
+	default:
+	case FR_ACT_BLACKHOLE:
+		err = -EINVAL;
+		rt = net->ipv6.ip6_blk_hole_entry;
+		goto discard_pkt;
+	case FR_ACT_PROHIBIT:
+		err = -EACCES;
+		rt = net->ipv6.ip6_prohibit_entry;
+		goto discard_pkt;
+	}
+
+	table = fib6_get_table(net, rule->table);
+	if (!table) {
+		err = -EAGAIN;
+		goto out;
+	}
+
+	rt = lookup(net, table, flp6, flags);
+	if (rt != net->ipv6.ip6_null_entry) {
+		struct fib6_rule *r = (struct fib6_rule *)rule;
+
+		/*
+		 * If we need to find a source address for this traffic,
+		 * we check the result if it meets requirement of the rule.
+		 */
+		if ((rule->flags & FIB_RULE_FIND_SADDR) &&
+		    r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
+			struct in6_addr saddr;
+
+			if (ipv6_dev_get_saddr(net,
+					       ip6_dst_idev(&rt->dst)->dev,
+					       &flp6->daddr,
+					       rt6_flags2srcprefs(flags),
+					       &saddr))
+				goto again;
+			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
+					       r->src.plen))
+				goto again;
+			flp6->saddr = saddr;
+		}
+		goto out;
+	}
+again:
+	ip6_rt_put(rt);
+	err = -EAGAIN;
+	rt = NULL;
+	goto out;
+
+discard_pkt:
+	dst_hold(&rt->dst);
+out:
+	arg->result = rt;
+	return err;
+}
+
+static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
+{
+	struct rt6_info *rt = (struct rt6_info *) arg->result;
+	struct net_device *dev = NULL;
+
+	if (rt->rt6i_idev)
+		dev = rt->rt6i_idev->dev;
+
+	/* do not accept result if the route does
+	 * not meet the required prefix length
+	 */
+	if (rt->rt6i_dst.plen <= rule->suppress_prefixlen)
+		goto suppress_route;
+
+	/* do not accept result if the route uses a device
+	 * belonging to a forbidden interface group
+	 */
+	if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
+		goto suppress_route;
+
+	return false;
+
+suppress_route:
+	ip6_rt_put(rt);
+	return true;
+}
+
+static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	struct fib6_rule *r = (struct fib6_rule *) rule;
+	struct flowi6 *fl6 = &fl->u.ip6;
+
+	if (r->dst.plen &&
+	    !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen))
+		return 0;
+
+	/*
+	 * If FIB_RULE_FIND_SADDR is set and we do not have a
+	 * source address for the traffic, we defer check for
+	 * source address.
+	 */
+	if (r->src.plen) {
+		if (flags & RT6_LOOKUP_F_HAS_SADDR) {
+			if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr,
+					       r->src.plen))
+				return 0;
+		} else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
+			return 0;
+	}
+
+	if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
+		return 0;
+
+	return 1;
+}
+
+static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = {
+	FRA_GENERIC_POLICY,
+};
+
+static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
+{
+	int err = -EINVAL;
+	struct net *net = sock_net(skb->sk);
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	if (rule->action == FR_ACT_TO_TBL) {
+		if (rule->table == RT6_TABLE_UNSPEC)
+			goto errout;
+
+		if (fib6_new_table(net, rule->table) == NULL) {
+			err = -ENOBUFS;
+			goto errout;
+		}
+	}
+
+	if (frh->src_len)
+		nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
+			   sizeof(struct in6_addr));
+
+	if (frh->dst_len)
+		nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
+			   sizeof(struct in6_addr));
+
+	rule6->src.plen = frh->src_len;
+	rule6->dst.plen = frh->dst_len;
+	rule6->tclass = frh->tos;
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
+{
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	if (frh->src_len && (rule6->src.plen != frh->src_len))
+		return 0;
+
+	if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
+		return 0;
+
+	if (frh->tos && (rule6->tclass != frh->tos))
+		return 0;
+
+	if (frh->src_len &&
+	    nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
+		return 0;
+
+	if (frh->dst_len &&
+	    nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
+		return 0;
+
+	return 1;
+}
+
+static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct fib_rule_hdr *frh)
+{
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	frh->dst_len = rule6->dst.plen;
+	frh->src_len = rule6->src.plen;
+	frh->tos = rule6->tclass;
+
+	if ((rule6->dst.plen &&
+	     nla_put(skb, FRA_DST, sizeof(struct in6_addr),
+		     &rule6->dst.addr)) ||
+	    (rule6->src.plen &&
+	     nla_put(skb, FRA_SRC, sizeof(struct in6_addr),
+		     &rule6->src.addr)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOBUFS;
+}
+
+static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
+{
+	return 0x3FFF;
+}
+
+static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
+{
+	return nla_total_size(16) /* dst */
+	       + nla_total_size(16); /* src */
+}
+
+static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
+	.family			= AF_INET6,
+	.rule_size		= sizeof(struct fib6_rule),
+	.addr_size		= sizeof(struct in6_addr),
+	.action			= fib6_rule_action,
+	.match			= fib6_rule_match,
+	.suppress		= fib6_rule_suppress,
+	.configure		= fib6_rule_configure,
+	.compare		= fib6_rule_compare,
+	.fill			= fib6_rule_fill,
+	.default_pref		= fib6_rule_default_pref,
+	.nlmsg_payload		= fib6_rule_nlmsg_payload,
+	.nlgroup		= RTNLGRP_IPV6_RULE,
+	.policy			= fib6_rule_policy,
+	.owner			= THIS_MODULE,
+	.fro_net		= &init_net,
+};
+
+static int __net_init fib6_rules_net_init(struct net *net)
+{
+	struct fib_rules_ops *ops;
+	int err = -ENOMEM;
+
+	ops = fib_rules_register(&fib6_rules_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv6.fib6_rules_ops = ops;
+
+
+	err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0,
+				   RT6_TABLE_LOCAL, 0);
+	if (err)
+		goto out_fib6_rules_ops;
+
+	err = fib_default_rule_add(net->ipv6.fib6_rules_ops,
+				   0x7FFE, RT6_TABLE_MAIN, 0);
+	if (err)
+		goto out_fib6_rules_ops;
+
+out:
+	return err;
+
+out_fib6_rules_ops:
+	fib_rules_unregister(ops);
+	goto out;
+}
+
+static void __net_exit fib6_rules_net_exit(struct net *net)
+{
+	fib_rules_unregister(net->ipv6.fib6_rules_ops);
+}
+
+static struct pernet_operations fib6_rules_net_ops = {
+	.init = fib6_rules_net_init,
+	.exit = fib6_rules_net_exit,
+};
+
+int __init fib6_rules_init(void)
+{
+	return register_pernet_subsys(&fib6_rules_net_ops);
+}
+
+
+void fib6_rules_cleanup(void)
+{
+	unregister_pernet_subsys(&fib6_rules_net_ops);
+}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 23e540365a1..f6c84a6eb23 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
- *
  *	Based on net/ipv4/icmp.c
  *
  *	RFC 1885
@@ -31,17 +29,20 @@
  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
  */
 
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
@@ -56,6 +57,7 @@
 
 #include <net/ipv6.h>
 #include <net/ip6_checksum.h>
+#include <net/ping.h>
 #include <net/protocol.h>
 #include <net/raw.h>
 #include <net/rawv6.h>
@@ -63,11 +65,11 @@
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
 #include <net/icmp.h>
+#include <net/xfrm.h>
+#include <net/inet_common.h>
+#include <net/dsfield.h>
 
 #include <asm/uaccess.h>
-#include <asm/system.h>
-
-DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
 
 /*
  *	The ICMP socket(s). This is the most convenient way to flow control
@@ -76,43 +78,57 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
  *
  *	On SMP we have one ICMP socket per-cpu.
  */
-static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
-#define icmpv6_socket	__get_cpu_var(__icmpv6_socket)
+static inline struct sock *icmpv6_sk(struct net *net)
+{
+	return net->ipv6.icmp_sk[smp_processor_id()];
+}
+
+static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		       u8 type, u8 code, int offset, __be32 info)
+{
+	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
+	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
+	struct net *net = dev_net(skb->dev);
+
+	if (type == ICMPV6_PKT_TOOBIG)
+		ip6_update_pmtu(skb, net, info, 0, 0);
+	else if (type == NDISC_REDIRECT)
+		ip6_redirect(skb, net, skb->dev->ifindex, 0);
 
-static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
+	if (!(type & ICMPV6_INFOMSG_MASK))
+		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
+			ping_err(skb, offset, info);
+}
+
+static int icmpv6_rcv(struct sk_buff *skb);
 
-static struct inet6_protocol icmpv6_protocol = {
+static const struct inet6_protocol icmpv6_protocol = {
 	.handler	=	icmpv6_rcv,
-	.flags		=	INET6_PROTO_FINAL,
+	.err_handler	=	icmpv6_err,
+	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
-static __inline__ int icmpv6_xmit_lock(void)
+static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
 {
+	struct sock *sk;
+
 	local_bh_disable();
 
-	if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
+	sk = icmpv6_sk(net);
+	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 		/* This can happen if the output path (f.e. SIT or
 		 * ip6ip6 tunnel) signals dst_link_failure() for an
 		 * outgoing ICMP6 packet.
 		 */
 		local_bh_enable();
-		return 1;
+		return NULL;
 	}
-	return 0;
+	return sk;
 }
 
-static __inline__ void icmpv6_xmit_unlock(void)
-{
-	spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
-}
-
-/* 
- * Slightly more convenient version of icmpv6_send.
- */
-void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
+static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 {
-	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
-	kfree_skb(skb);
+	spin_unlock_bh(&sk->sk_lock.slock);
 }
 
 /*
@@ -126,18 +142,19 @@ void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
  *	--ANK (980726)
  */
 
-static int is_ineligible(struct sk_buff *skb)
+static bool is_ineligible(const struct sk_buff *skb)
 {
-	int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
+	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 	int len = skb->len - ptr;
-	__u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+	__be16 frag_off;
 
 	if (len < 0)
-		return 1;
+		return true;
 
-	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
+	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
 	if (ptr < 0)
-		return 0;
+		return false;
 	if (nexthdr == IPPROTO_ICMPV6) {
 		u8 _type, *tp;
 		tp = skb_header_pointer(skb,
@@ -145,49 +162,53 @@ static int is_ineligible(struct sk_buff *skb)
 			sizeof(_type), &_type);
 		if (tp == NULL ||
 		    !(*tp & ICMPV6_INFOMSG_MASK))
-			return 1;
+			return true;
 	}
-	return 0;
+	return false;
 }
 
-static int sysctl_icmpv6_time = 1*HZ; 
-
-/* 
- * Check the ICMP output rate limit 
+/*
+ * Check the ICMP output rate limit
  */
-static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
-				     struct flowi *fl)
+static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
+				      struct flowi6 *fl6)
 {
 	struct dst_entry *dst;
-	int res = 0;
+	struct net *net = sock_net(sk);
+	bool res = false;
 
 	/* Informational messages are not limited. */
 	if (type & ICMPV6_INFOMSG_MASK)
-		return 1;
+		return true;
 
 	/* Do not limit pmtu discovery, it would break it. */
 	if (type == ICMPV6_PKT_TOOBIG)
-		return 1;
+		return true;
 
-	/* 
+	/*
 	 * Look up the output route.
 	 * XXX: perhaps the expire for routing entries cloned by
 	 * this lookup should be more aggressive (not longer than timeout).
 	 */
-	dst = ip6_route_output(sk, fl);
+	dst = ip6_route_output(net, sk, fl6);
 	if (dst->error) {
-		IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+		IP6_INC_STATS(net, ip6_dst_idev(dst),
+			      IPSTATS_MIB_OUTNOROUTES);
 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
-		res = 1;
+		res = true;
 	} else {
 		struct rt6_info *rt = (struct rt6_info *)dst;
-		int tmo = sysctl_icmpv6_time;
+		int tmo = net->ipv6.sysctl.icmpv6_time;
+		struct inet_peer *peer;
 
 		/* Give more bandwidth to wider prefixes. */
 		if (rt->rt6i_dst.plen < 128)
 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 
-		res = xrlim_allow(dst, tmo);
+		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+		res = inet_peer_xrlim_allow(peer, tmo);
+		if (peer)
+			inet_putpeer(peer);
 	}
 	dst_release(dst);
 	return res;
@@ -196,22 +217,23 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
 /*
  *	an inline helper for the "simple" if statement below
  *	checks if parameter problem report is caused by an
- *	unrecognized IPv6 option that has the Option Type 
+ *	unrecognized IPv6 option that has the Option Type
  *	highest-order two bits set to 10
  */
 
-static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
+static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 {
 	u8 _optval, *op;
 
-	offset += skb->nh.raw - skb->data;
+	offset += skb_network_offset(skb);
 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 	if (op == NULL)
-		return 1;
+		return true;
 	return (*op & 0xC0) == 0x80;
 }
 
-static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+			       struct icmp6hdr *thdr, int len)
 {
 	struct sk_buff *skb;
 	struct icmp6hdr *icmp6h;
@@ -220,33 +242,31 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
 		goto out;
 
-	icmp6h = (struct icmp6hdr*) skb->h.raw;
+	icmp6h = icmp6_hdr(skb);
 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 	icmp6h->icmp6_cksum = 0;
 
 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
-		skb->csum = csum_partial((char *)icmp6h,
+		skb->csum = csum_partial(icmp6h,
 					sizeof(struct icmp6hdr), skb->csum);
-		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
-						      &fl->fl6_dst,
-						      len, fl->proto,
+		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
+						      &fl6->daddr,
+						      len, fl6->flowi6_proto,
 						      skb->csum);
 	} else {
-		u32 tmp_csum = 0;
+		__wsum tmp_csum = 0;
 
 		skb_queue_walk(&sk->sk_write_queue, skb) {
 			tmp_csum = csum_add(tmp_csum, skb->csum);
 		}
 
-		tmp_csum = csum_partial((char *)icmp6h,
+		tmp_csum = csum_partial(icmp6h,
 					sizeof(struct icmp6hdr), tmp_csum);
-		tmp_csum = csum_ipv6_magic(&fl->fl6_src,
-					   &fl->fl6_dst,
-					   len, fl->proto, tmp_csum);
-		icmp6h->icmp6_cksum = tmp_csum;
+		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
+						      &fl6->daddr,
+						      len, fl6->flowi6_proto,
+						      tmp_csum);
 	}
-	if (icmp6h->icmp6_cksum == 0)
-		icmp6h->icmp6_cksum = -1;
 	ip6_push_pending_frames(sk);
 out:
 	return err;
@@ -255,53 +275,147 @@ out:
 struct icmpv6_msg {
 	struct sk_buff	*skb;
 	int		offset;
+	uint8_t		type;
 };
 
 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 {
 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
 	struct sk_buff *org_skb = msg->skb;
-	__u32 csum = 0;
+	__wsum csum = 0;
 
 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
 				      to, len, csum);
 	skb->csum = csum_block_add(skb->csum, csum, odd);
+	if (!(msg->type & ICMPV6_INFOMSG_MASK))
+		nf_ct_attach(skb, org_skb);
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static void mip6_addr_swap(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct ipv6_destopt_hao *hao;
+	struct in6_addr tmp;
+	int off;
+
+	if (opt->dsthao) {
+		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+		if (likely(off >= 0)) {
+			hao = (struct ipv6_destopt_hao *)
+					(skb_network_header(skb) + off);
+			tmp = iph->saddr;
+			iph->saddr = hao->addr;
+			hao->addr = tmp;
+		}
+	}
+}
+#else
+static inline void mip6_addr_swap(struct sk_buff *skb) {}
+#endif
+
+static struct dst_entry *icmpv6_route_lookup(struct net *net,
+					     struct sk_buff *skb,
+					     struct sock *sk,
+					     struct flowi6 *fl6)
+{
+	struct dst_entry *dst, *dst2;
+	struct flowi6 fl2;
+	int err;
+
+	err = ip6_dst_lookup(sk, &dst, fl6);
+	if (err)
+		return ERR_PTR(err);
+
+	/*
+	 * We won't send icmp if the destination is known
+	 * anycast.
+	 */
+	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
+		dst_release(dst);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* No need to clone since we're just using its address. */
+	dst2 = dst;
+
+	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
+	if (!IS_ERR(dst)) {
+		if (dst != dst2)
+			return dst;
+	} else {
+		if (PTR_ERR(dst) == -EPERM)
+			dst = NULL;
+		else
+			return dst;
+	}
+
+	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
+	if (err)
+		goto relookup_failed;
+
+	err = ip6_dst_lookup(sk, &dst2, &fl2);
+	if (err)
+		goto relookup_failed;
+
+	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
+	if (!IS_ERR(dst2)) {
+		dst_release(dst);
+		dst = dst2;
+	} else {
+		err = PTR_ERR(dst2);
+		if (err == -EPERM) {
+			dst_release(dst);
+			return dst2;
+		} else
+			goto relookup_failed;
+	}
+
+relookup_failed:
+	if (dst)
+		return dst;
+	return ERR_PTR(err);
+}
+
 /*
  *	Send an ICMP message in response to a packet in error
  */
-void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, 
-		 struct net_device *dev)
+static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 {
+	struct net *net = dev_net(skb->dev);
 	struct inet6_dev *idev = NULL;
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct sock *sk;
 	struct ipv6_pinfo *np;
-	struct in6_addr *saddr = NULL;
+	const struct in6_addr *saddr = NULL;
 	struct dst_entry *dst;
 	struct icmp6hdr tmp_hdr;
-	struct flowi fl;
+	struct flowi6 fl6;
 	struct icmpv6_msg msg;
 	int iif = 0;
 	int addr_type = 0;
 	int len;
-	int hlimit, tclass;
+	int hlimit;
 	int err = 0;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
-	if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
+	if ((u8 *)hdr < skb->head ||
+	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 		return;
 
 	/*
-	 *	Make sure we respect the rules 
+	 *	Make sure we respect the rules
 	 *	i.e. RFC 1885 2.4(e)
-	 *	Rule (e.1) is enforced by not using icmpv6_send
+	 *	Rule (e.1) is enforced by not using icmp6_send
 	 *	in any code that processes icmp errors.
 	 */
 	addr_type = ipv6_addr_type(&hdr->daddr);
 
-	if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
+	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
+	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
 		saddr = &hdr->daddr;
 
 	/*
@@ -310,8 +424,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 
 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
 		if (type != ICMPV6_PKT_TOOBIG &&
-		    !(type == ICMPV6_PARAMPROB && 
-		      code == ICMPV6_UNK_OPTION && 
+		    !(type == ICMPV6_PARAMPROB &&
+		      code == ICMPV6_UNK_OPTION &&
 		      (opt_unrec(skb, info))))
 			return;
 
@@ -324,42 +438,48 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	 *	Source addr check
 	 */
 
-	if (addr_type & IPV6_ADDR_LINKLOCAL)
+	if (__ipv6_addr_needs_scope_id(addr_type))
 		iif = skb->dev->ifindex;
 
 	/*
-	 *	Must not send if we know that source is Anycast also.
-	 *	for now we don't know that.
+	 *	Must not send error if the source does not uniquely
+	 *	identify a single node (RFC2463 Section 2.4).
+	 *	We check unspecified / multicast addresses here,
+	 *	and anycast addresses will be checked later.
 	 */
 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
+		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
 		return;
 	}
 
-	/* 
+	/*
 	 *	Never answer to a ICMP packet.
 	 */
 	if (is_ineligible(skb)) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
+		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
 		return;
 	}
 
-	memset(&fl, 0, sizeof(fl));
-	fl.proto = IPPROTO_ICMPV6;
-	ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
-	if (saddr)
-		ipv6_addr_copy(&fl.fl6_src, saddr);
-	fl.oif = iif;
-	fl.fl_icmp_type = type;
-	fl.fl_icmp_code = code;
+	mip6_addr_swap(skb);
 
-	if (icmpv6_xmit_lock())
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_proto = IPPROTO_ICMPV6;
+	fl6.daddr = hdr->saddr;
+	if (saddr)
+		fl6.saddr = *saddr;
+	fl6.flowi6_mark = mark;
+	fl6.flowi6_oif = iif;
+	fl6.fl6_icmp_type = type;
+	fl6.fl6_icmp_code = code;
+	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+
+	sk = icmpv6_xmit_lock(net);
+	if (sk == NULL)
 		return;
-
-	sk = icmpv6_socket->sk;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
-	if (!icmpv6_xrlim_allow(sk, type, &fl))
+	if (!icmpv6_xrlim_allow(sk, type, &fl6))
 		goto out;
 
 	tmp_hdr.icmp6_type = type;
@@ -367,30 +487,20 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	tmp_hdr.icmp6_cksum = 0;
 	tmp_hdr.icmp6_pointer = htonl(info);
 
-	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
-		fl.oif = np->mcast_oif;
+	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+		fl6.flowi6_oif = np->mcast_oif;
+	else if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->ucast_oif;
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
+	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
+	if (IS_ERR(dst))
 		goto out;
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
-		goto out;
-
-	if (ipv6_addr_is_multicast(&fl.fl6_dst))
-		hlimit = np->mcast_hops;
-	else
-		hlimit = np->hop_limit;
-	if (hlimit < 0)
-		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
-	if (hlimit < 0)
-		hlimit = ipv6_get_hoplimit(dst->dev);
 
-	tclass = np->cork.tclass;
-	if (tclass < 0)
-		tclass = 0;
+	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	msg.skb = skb;
-	msg.offset = skb->nh.raw - skb->data;
+	msg.offset = skb_network_offset(skb);
+	msg.type = type;
 
 	len = skb->len - msg.offset;
 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
@@ -399,124 +509,122 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		goto out_dst_release;
 	}
 
-	idev = in6_dev_get(skb->dev);
+	rcu_read_lock();
+	idev = __in6_dev_get(skb->dev);
 
 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
 			      len + sizeof(struct icmp6hdr),
-			      sizeof(struct icmp6hdr),
-			      hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
-			      MSG_DONTWAIT);
+			      sizeof(struct icmp6hdr), hlimit,
+			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
+			      MSG_DONTWAIT, np->dontfrag);
 	if (err) {
+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
-		goto out_put;
+	} else {
+		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+						 len + sizeof(struct icmp6hdr));
 	}
-	err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
-
-	if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
-		ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
-	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
-
-out_put:
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	rcu_read_unlock();
 out_dst_release:
 	dst_release(dst);
 out:
-	icmpv6_xmit_unlock();
+	icmpv6_xmit_unlock(sk);
+}
+
+/* Slightly more convenient version of icmp6_send.
+ */
+void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
+{
+	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
+	kfree_skb(skb);
 }
 
 static void icmpv6_echo_reply(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dev);
 	struct sock *sk;
 	struct inet6_dev *idev;
 	struct ipv6_pinfo *np;
-	struct in6_addr *saddr = NULL;
-	struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
+	const struct in6_addr *saddr = NULL;
+	struct icmp6hdr *icmph = icmp6_hdr(skb);
 	struct icmp6hdr tmp_hdr;
-	struct flowi fl;
+	struct flowi6 fl6;
 	struct icmpv6_msg msg;
 	struct dst_entry *dst;
 	int err = 0;
 	int hlimit;
-	int tclass;
+	u8 tclass;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
-	saddr = &skb->nh.ipv6h->daddr;
+	saddr = &ipv6_hdr(skb)->daddr;
 
-	if (!ipv6_unicast_destination(skb))
+	if (!ipv6_unicast_destination(skb) &&
+	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
+	      ipv6_anycast_destination(skb)))
 		saddr = NULL;
 
 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
 
-	memset(&fl, 0, sizeof(fl));
-	fl.proto = IPPROTO_ICMPV6;
-	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_proto = IPPROTO_ICMPV6;
+	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
-		ipv6_addr_copy(&fl.fl6_src, saddr);
-	fl.oif = skb->dev->ifindex;
-	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
-
-	if (icmpv6_xmit_lock())
+		fl6.saddr = *saddr;
+	fl6.flowi6_oif = skb->dev->ifindex;
+	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+	fl6.flowi6_mark = mark;
+	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+
+	sk = icmpv6_xmit_lock(net);
+	if (sk == NULL)
 		return;
-
-	sk = icmpv6_socket->sk;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
-	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
-		fl.oif = np->mcast_oif;
+	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+		fl6.flowi6_oif = np->mcast_oif;
+	else if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->ucast_oif;
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
+	err = ip6_dst_lookup(sk, &dst, &fl6);
 	if (err)
 		goto out;
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
+	if (IS_ERR(dst))
 		goto out;
 
-	if (ipv6_addr_is_multicast(&fl.fl6_dst))
-		hlimit = np->mcast_hops;
-	else
-		hlimit = np->hop_limit;
-	if (hlimit < 0)
-		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
-	if (hlimit < 0)
-		hlimit = ipv6_get_hoplimit(dst->dev);
-
-	tclass = np->cork.tclass;
-	if (tclass < 0)
-		tclass = 0;
+	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
-	idev = in6_dev_get(skb->dev);
+	idev = __in6_dev_get(skb->dev);
 
 	msg.skb = skb;
 	msg.offset = 0;
+	msg.type = ICMPV6_ECHO_REPLY;
 
+	tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
-				sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
-				(struct rt6_info*)dst, MSG_DONTWAIT);
+				sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
+				(struct rt6_info *)dst, MSG_DONTWAIT,
+				np->dontfrag);
 
 	if (err) {
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
-		goto out_put;
+	} else {
+		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+						 skb->len + sizeof(struct icmp6hdr));
 	}
-	err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
-
-        ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
-        ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
-
-out_put: 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
 	dst_release(dst);
-out: 
-	icmpv6_xmit_unlock();
+out:
+	icmpv6_xmit_unlock(sk);
 }
 
-static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
+void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
-	struct in6_addr *saddr, *daddr;
-	struct inet6_protocol *ipprot;
-	struct sock *sk;
+	const struct inet6_protocol *ipprot;
 	int inner_offset;
-	int hash;
+	__be16 frag_off;
 	u8 nexthdr;
 
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
@@ -525,7 +633,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
 	if (ipv6_ext_hdr(nexthdr)) {
 		/* now skip over extension headers */
-		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+						&nexthdr, &frag_off);
 		if (inner_offset<0)
 			return;
 	} else {
@@ -536,9 +645,6 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
 	if (!pskb_may_pull(skb, inner_offset+8))
 		return;
 
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
-
 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 	   Without this we will not able f.e. to make source routed
 	   pmtu discovery.
@@ -546,73 +652,67 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
 	   --ANK (980726)
 	 */
 
-	hash = nexthdr & (MAX_INET_PROTOS - 1);
-
 	rcu_read_lock();
-	ipprot = rcu_dereference(inet6_protos[hash]);
+	ipprot = rcu_dereference(inet6_protos[nexthdr]);
 	if (ipprot && ipprot->err_handler)
 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 	rcu_read_unlock();
 
-	read_lock(&raw_v6_lock);
-	if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
-		while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
-					    IP6CB(skb)->iif))) {
-			rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
-			sk = sk_next(sk);
-		}
-	}
-	read_unlock(&raw_v6_lock);
+	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 }
-  
+
 /*
  *	Handle icmp messages
  */
 
-static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int icmpv6_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	struct net_device *dev = skb->dev;
 	struct inet6_dev *idev = __in6_dev_get(dev);
-	struct in6_addr *saddr, *daddr;
-	struct ipv6hdr *orig_hdr;
+	const struct in6_addr *saddr, *daddr;
 	struct icmp6hdr *hdr;
-	int type;
+	u8 type;
 
-	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+		struct sec_path *sp = skb_sec_path(skb);
+		int nh;
 
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
+		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
+				 XFRM_STATE_ICMP))
+			goto drop_no_count;
 
-	/* Perform checksum. */
-	if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
-				    skb->csum)) {
-			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n");
-			skb->ip_summed = CHECKSUM_NONE;
-		}
+		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
+			goto drop_no_count;
+
+		nh = skb_network_offset(skb);
+		skb_set_network_header(skb, sizeof(*hdr));
+
+		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+			goto drop_no_count;
+
+		skb_set_network_header(skb, nh);
 	}
-	if (skb->ip_summed == CHECKSUM_NONE) {
-		if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
-				    skb_checksum(skb, 0, skb->len, 0))) {
-			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
-				       NIP6(*saddr), NIP6(*daddr));
-			goto discard_it;
-		}
+
+	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+
+	saddr = &ipv6_hdr(skb)->saddr;
+	daddr = &ipv6_hdr(skb)->daddr;
+
+	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
+		LIMIT_NETDEBUG(KERN_DEBUG
+			       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
+			       saddr, daddr);
+		goto csum_error;
 	}
 
-	if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
+	if (!pskb_pull(skb, sizeof(*hdr)))
 		goto discard_it;
 
-	hdr = (struct icmp6hdr *) skb->h.raw;
+	hdr = icmp6_hdr(skb);
 
 	type = hdr->icmp6_type;
 
-	if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
-		ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
-	else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
-		ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
+	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
 
 	switch (type) {
 	case ICMPV6_ECHO_REQUEST:
@@ -620,7 +720,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 		break;
 
 	case ICMPV6_ECHO_REPLY:
-		/* we couldn't care less */
+		ping_rcv(skb);
 		break;
 
 	case ICMPV6_PKT_TOOBIG:
@@ -631,10 +731,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 		 */
 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 			goto discard_it;
-		hdr = (struct icmp6hdr *) skb->h.raw;
-		orig_hdr = (struct ipv6hdr *) (hdr + 1);
-		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
-				   ntohl(hdr->icmp6_mtu));
+		hdr = icmp6_hdr(skb);
 
 		/*
 		 *	Drop through to notify
@@ -679,80 +776,140 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 		if (type & ICMPV6_INFOMSG_MASK)
 			break;
 
-		/* 
-		 * error of unknown type. 
-		 * must pass to upper level 
+		/*
+		 * error of unknown type.
+		 * must pass to upper level
 		 */
 
 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
-	};
+	}
+
 	kfree_skb(skb);
 	return 0;
 
+csum_error:
+	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 discard_it:
-	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
+	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+drop_no_count:
 	kfree_skb(skb);
 	return 0;
 }
 
-int __init icmpv6_init(struct net_proto_family *ops)
+void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
+		      u8 type,
+		      const struct in6_addr *saddr,
+		      const struct in6_addr *daddr,
+		      int oif)
+{
+	memset(fl6, 0, sizeof(*fl6));
+	fl6->saddr = *saddr;
+	fl6->daddr = *daddr;
+	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
+	fl6->fl6_icmp_type	= type;
+	fl6->fl6_icmp_code	= 0;
+	fl6->flowi6_oif		= oif;
+	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
+}
+
+/*
+ * Special lock-class for __icmpv6_sk:
+ */
+static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
+
+static int __net_init icmpv6_sk_init(struct net *net)
 {
 	struct sock *sk;
 	int err, i, j;
 
-	for_each_cpu(i) {
-		err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
-				       &per_cpu(__icmpv6_socket, i));
+	net->ipv6.icmp_sk =
+		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
+	if (net->ipv6.icmp_sk == NULL)
+		return -ENOMEM;
+
+	for_each_possible_cpu(i) {
+		err = inet_ctl_sock_create(&sk, PF_INET6,
+					   SOCK_RAW, IPPROTO_ICMPV6, net);
 		if (err < 0) {
-			printk(KERN_ERR
-			       "Failed to initialize the ICMP6 control socket "
-			       "(err %d).\n",
+			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
 			       err);
 			goto fail;
 		}
 
-		sk = per_cpu(__icmpv6_socket, i)->sk;
-		sk->sk_allocation = GFP_ATOMIC;
+		net->ipv6.icmp_sk[i] = sk;
+
+		/*
+		 * Split off their lock-class, because sk->sk_dst_lock
+		 * gets used from softirqs, which is safe for
+		 * __icmpv6_sk (because those never get directly used
+		 * via userspace syscalls), but unsafe for normal sockets.
+		 */
+		lockdep_set_class(&sk->sk_dst_lock,
+				  &icmpv6_socket_sk_dst_lock_key);
 
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
 		 */
-		sk->sk_sndbuf =
-			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
+		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
+	}
+	return 0;
 
-		sk->sk_prot->unhash(sk);
+ fail:
+	for (j = 0; j < i; j++)
+		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
+	kfree(net->ipv6.icmp_sk);
+	return err;
+}
+
+static void __net_exit icmpv6_sk_exit(struct net *net)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
 	}
+	kfree(net->ipv6.icmp_sk);
+}
+
+static struct pernet_operations icmpv6_sk_ops = {
+       .init = icmpv6_sk_init,
+       .exit = icmpv6_sk_exit,
+};
+
+int __init icmpv6_init(void)
+{
+	int err;
 
+	err = register_pernet_subsys(&icmpv6_sk_ops);
+	if (err < 0)
+		return err;
 
-	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
-		printk(KERN_ERR "Failed to register ICMP6 protocol\n");
-		err = -EAGAIN;
+	err = -EAGAIN;
+	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
 		goto fail;
-	}
 
+	err = inet6_register_icmp_sender(icmp6_send);
+	if (err)
+		goto sender_reg_err;
 	return 0;
 
- fail:
-	for (j = 0; j < i; j++) {
-		if (!cpu_possible(j))
-			continue;
-		sock_release(per_cpu(__icmpv6_socket, j));
-	}
-
+sender_reg_err:
+	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
+fail:
+	pr_err("Failed to register ICMP6 protocol\n");
+	unregister_pernet_subsys(&icmpv6_sk_ops);
 	return err;
 }
 
 void icmpv6_cleanup(void)
 {
-	int i;
-
-	for_each_cpu(i) {
-		sock_release(per_cpu(__icmpv6_socket, i));
-	}
+	inet6_unregister_icmp_sender(icmp6_send);
+	unregister_pernet_subsys(&icmpv6_sk_ops);
 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 }
 
-static struct icmp6_err {
+
+static const struct icmp6_err {
 	int err;
 	int fatal;
 } tab_unreach[] = {
@@ -776,9 +933,17 @@ static struct icmp6_err {
 		.err	= ECONNREFUSED,
 		.fatal	= 1,
 	},
+	{	/* POLICY_FAIL */
+		.err	= EACCES,
+		.fatal	= 1,
+	},
+	{	/* REJECT_ROUTE	*/
+		.err	= EACCES,
+		.fatal	= 1,
+	},
 };
 
-int icmpv6_err_convert(int type, int code, int *err)
+int icmpv6_err_convert(u8 type, u8 code, int *err)
 {
 	int fatal = 0;
 
@@ -787,7 +952,7 @@ int icmpv6_err_convert(int type, int code, int *err)
 	switch (type) {
 	case ICMPV6_DEST_UNREACH:
 		fatal = 1;
-		if (code <= ICMPV6_PORT_UNREACH) {
+		if (code < ARRAY_SIZE(tab_unreach)) {
 			*err  = tab_unreach[code].err;
 			fatal = tab_unreach[code].fatal;
 		}
@@ -796,7 +961,7 @@ int icmpv6_err_convert(int type, int code, int *err)
 	case ICMPV6_PKT_TOOBIG:
 		*err = EMSGSIZE;
 		break;
-		
+
 	case ICMPV6_PARAMPROB:
 		*err = EPROTO;
 		fatal = 1;
@@ -805,22 +970,36 @@ int icmpv6_err_convert(int type, int code, int *err)
 	case ICMPV6_TIME_EXCEED:
 		*err = EHOSTUNREACH;
 		break;
-	};
+	}
 
 	return fatal;
 }
+EXPORT_SYMBOL(icmpv6_err_convert);
 
 #ifdef CONFIG_SYSCTL
-ctl_table ipv6_icmp_table[] = {
+static struct ctl_table ipv6_icmp_table_template[] = {
 	{
-		.ctl_name	= NET_IPV6_ICMP_RATELIMIT,
 		.procname	= "ratelimit",
-		.data		= &sysctl_icmpv6_time,
+		.data		= &init_net.ipv6.sysctl.icmpv6_time,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= proc_dointvec_ms_jiffies,
 	},
-	{ .ctl_name = 0 },
+	{ },
 };
+
+struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(ipv6_icmp_table_template,
+			sizeof(ipv6_icmp_table_template),
+			GFP_KERNEL);
+
+	if (table)
+		table[0].data = &net->ipv6.sysctl.icmpv6_time;
+
+	return table;
+}
 #endif
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
new file mode 100644
index 00000000000..a245e5ddffb
--- /dev/null
+++ b/net/ipv6/inet6_connection_sock.c
@@ -0,0 +1,266 @@
+/*
+ * INET        An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Support for INET6 connection oriented protocols.
+ *
+ * Authors:    See the TCPv6 sources
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or(at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+
+#include <net/addrconf.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_ecn.h>
+#include <net/inet_hashtables.h>
+#include <net/ip6_route.h>
+#include <net/sock.h>
+#include <net/inet6_connection_sock.h>
+
+int inet6_csk_bind_conflict(const struct sock *sk,
+			    const struct inet_bind_bucket *tb, bool relax)
+{
+	const struct sock *sk2;
+	int reuse = sk->sk_reuse;
+	int reuseport = sk->sk_reuseport;
+	kuid_t uid = sock_i_uid((struct sock *)sk);
+
+	/* We must walk the whole port owner list in this case. -DaveM */
+	/*
+	 * See comment in inet_csk_bind_conflict about sock lookup
+	 * vs net namespaces issues.
+	 */
+	sk_for_each_bound(sk2, &tb->owners) {
+		if (sk != sk2 &&
+		    (!sk->sk_bound_dev_if ||
+		     !sk2->sk_bound_dev_if ||
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+			if ((!reuse || !sk2->sk_reuse ||
+			     sk2->sk_state == TCP_LISTEN) &&
+			    (!reuseport || !sk2->sk_reuseport ||
+			     (sk2->sk_state != TCP_TIME_WAIT &&
+			      !uid_eq(uid,
+				      sock_i_uid((struct sock *)sk2))))) {
+				if (ipv6_rcv_saddr_equal(sk, sk2))
+					break;
+			}
+			if (!relax && reuse && sk2->sk_reuse &&
+			    sk2->sk_state != TCP_LISTEN &&
+			    ipv6_rcv_saddr_equal(sk, sk2))
+				break;
+		}
+	}
+
+	return sk2 != NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
+
+struct dst_entry *inet6_csk_route_req(struct sock *sk,
+				      struct flowi6 *fl6,
+				      const struct request_sock *req)
+{
+	struct inet_request_sock *ireq = inet_rsk(req);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_addr *final_p, final;
+	struct dst_entry *dst;
+
+	memset(fl6, 0, sizeof(*fl6));
+	fl6->flowi6_proto = IPPROTO_TCP;
+	fl6->daddr = ireq->ir_v6_rmt_addr;
+	final_p = fl6_update_dst(fl6, np->opt, &final);
+	fl6->saddr = ireq->ir_v6_loc_addr;
+	fl6->flowi6_oif = ireq->ir_iif;
+	fl6->flowi6_mark = ireq->ir_mark;
+	fl6->fl6_dport = ireq->ir_rmt_port;
+	fl6->fl6_sport = htons(ireq->ir_num);
+	security_req_classify_flow(req, flowi6_to_flowi(fl6));
+
+	dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+	if (IS_ERR(dst))
+		return NULL;
+
+	return dst;
+}
+
+/*
+ * request_sock (formerly open request) hash tables.
+ */
+static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
+			   const u32 rnd, const u32 synq_hsize)
+{
+	u32 c;
+
+	c = jhash_3words((__force u32)raddr->s6_addr32[0],
+			 (__force u32)raddr->s6_addr32[1],
+			 (__force u32)raddr->s6_addr32[2],
+			 rnd);
+
+	c = jhash_2words((__force u32)raddr->s6_addr32[3],
+			 (__force u32)rport,
+			 c);
+
+	return c & (synq_hsize - 1);
+}
+
+struct request_sock *inet6_csk_search_req(const struct sock *sk,
+					  struct request_sock ***prevp,
+					  const __be16 rport,
+					  const struct in6_addr *raddr,
+					  const struct in6_addr *laddr,
+					  const int iif)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+	struct request_sock *req, **prev;
+
+	for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
+						     lopt->hash_rnd,
+						     lopt->nr_table_entries)];
+	     (req = *prev) != NULL;
+	     prev = &req->dl_next) {
+		const struct inet_request_sock *ireq = inet_rsk(req);
+
+		if (ireq->ir_rmt_port == rport &&
+		    req->rsk_ops->family == AF_INET6 &&
+		    ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
+		    ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
+		    (!ireq->ir_iif || ireq->ir_iif == iif)) {
+			WARN_ON(req->sk != NULL);
+			*prevp = prev;
+			return req;
+		}
+	}
+
+	return NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_search_req);
+
+void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
+				    struct request_sock *req,
+				    const unsigned long timeout)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+	const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
+				      inet_rsk(req)->ir_rmt_port,
+				      lopt->hash_rnd, lopt->nr_table_entries);
+
+	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
+	inet_csk_reqsk_queue_added(sk, timeout);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
+
+void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
+
+	sin6->sin6_family = AF_INET6;
+	sin6->sin6_addr = sk->sk_v6_daddr;
+	sin6->sin6_port	= inet_sk(sk)->inet_dport;
+	/* We do not store received flowlabel for TCP */
+	sin6->sin6_flowinfo = 0;
+	sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
+						  sk->sk_bound_dev_if);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
+
+static inline
+void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
+			   const struct in6_addr *daddr,
+			   const struct in6_addr *saddr)
+{
+	__ip6_dst_store(sk, dst, daddr, saddr);
+}
+
+static inline
+struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
+{
+	return __sk_dst_check(sk, cookie);
+}
+
+static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
+						struct flowi6 *fl6)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_addr *final_p, final;
+	struct dst_entry *dst;
+
+	memset(fl6, 0, sizeof(*fl6));
+	fl6->flowi6_proto = sk->sk_protocol;
+	fl6->daddr = sk->sk_v6_daddr;
+	fl6->saddr = np->saddr;
+	fl6->flowlabel = np->flow_label;
+	IP6_ECN_flow_xmit(sk, fl6->flowlabel);
+	fl6->flowi6_oif = sk->sk_bound_dev_if;
+	fl6->flowi6_mark = sk->sk_mark;
+	fl6->fl6_sport = inet->inet_sport;
+	fl6->fl6_dport = inet->inet_dport;
+	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
+
+	final_p = fl6_update_dst(fl6, np->opt, &final);
+
+	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
+	if (!dst) {
+		dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+
+		if (!IS_ERR(dst))
+			__inet6_csk_dst_store(sk, dst, NULL, NULL);
+	}
+	return dst;
+}
+
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct flowi6 fl6;
+	struct dst_entry *dst;
+	int res;
+
+	dst = inet6_csk_route_socket(sk, &fl6);
+	if (IS_ERR(dst)) {
+		sk->sk_err_soft = -PTR_ERR(dst);
+		sk->sk_route_caps = 0;
+		kfree_skb(skb);
+		return PTR_ERR(dst);
+	}
+
+	rcu_read_lock();
+	skb_dst_set_noref(skb, dst);
+
+	/* Restore final destination back after routing done */
+	fl6.daddr = sk->sk_v6_daddr;
+
+	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+	rcu_read_unlock();
+	return res;
+}
+EXPORT_SYMBOL_GPL(inet6_csk_xmit);
+
+struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
+{
+	struct flowi6 fl6;
+	struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6);
+
+	if (IS_ERR(dst))
+		return NULL;
+	dst->ops->update_pmtu(dst, sk, NULL, mtu);
+
+	dst = inet6_csk_route_socket(sk, &fl6);
+	return IS_ERR(dst) ? NULL : dst;
+}
+EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 01d5f46d4e4..262e13c02ec 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -5,7 +5,8 @@
  *
  *		Generic INET6 transport hashtables
  *
- * Authors:	Lotsa people, from code originally in tcp
+ * Authors:	Lotsa people, from code originally in tcp, generalised here
+ * 		by Arnaldo Carvalho de Melo <acme@mandriva.com>
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -13,69 +14,315 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h>
-
 #include <linux/module.h>
+#include <linux/random.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
 #include <net/inet6_hashtables.h>
+#include <net/secure_seq.h>
+#include <net/ip.h>
+
+static unsigned int inet6_ehashfn(struct net *net,
+				  const struct in6_addr *laddr,
+				  const u16 lport,
+				  const struct in6_addr *faddr,
+				  const __be16 fport)
+{
+	static u32 inet6_ehash_secret __read_mostly;
+	static u32 ipv6_hash_secret __read_mostly;
+
+	u32 lhash, fhash;
+
+	net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
+	net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
 
-struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
-				   const struct in6_addr *daddr,
-				   const unsigned short hnum, const int dif)
+	lhash = (__force u32)laddr->s6_addr32[3];
+	fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+
+	return __inet6_ehashfn(lhash, lport, fhash, fport,
+			       inet6_ehash_secret + net_hash_mix(net));
+}
+
+static int inet6_sk_ehashfn(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
+	const struct in6_addr *faddr = &sk->sk_v6_daddr;
+	const __u16 lport = inet->inet_num;
+	const __be16 fport = inet->inet_dport;
+	struct net *net = sock_net(sk);
+
+	return inet6_ehashfn(net, laddr, lport, faddr, fport);
+}
+
+int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
+{
+	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+	int twrefcnt = 0;
+
+	WARN_ON(!sk_unhashed(sk));
+
+	if (sk->sk_state == TCP_LISTEN) {
+		struct inet_listen_hashbucket *ilb;
+
+		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+		spin_lock(&ilb->lock);
+		__sk_nulls_add_node_rcu(sk, &ilb->head);
+		spin_unlock(&ilb->lock);
+	} else {
+		unsigned int hash;
+		struct hlist_nulls_head *list;
+		spinlock_t *lock;
+
+		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
+		list = &inet_ehash_bucket(hashinfo, hash)->chain;
+		lock = inet_ehash_lockp(hashinfo, hash);
+		spin_lock(lock);
+		__sk_nulls_add_node_rcu(sk, list);
+		if (tw) {
+			WARN_ON(sk->sk_hash != tw->tw_hash);
+			twrefcnt = inet_twsk_unhash(tw);
+		}
+		spin_unlock(lock);
+	}
+
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	return twrefcnt;
+}
+EXPORT_SYMBOL(__inet6_hash);
+
+/*
+ * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
+ * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
+ *
+ * The sockhash lock must be held as a reader here.
+ */
+struct sock *__inet6_lookup_established(struct net *net,
+					struct inet_hashinfo *hashinfo,
+					   const struct in6_addr *saddr,
+					   const __be16 sport,
+					   const struct in6_addr *daddr,
+					   const u16 hnum,
+					   const int dif)
 {
 	struct sock *sk;
-	const struct hlist_node *node;
-	struct sock *result = NULL;
-	int score, hiscore = 0;
-
-	read_lock(&hashinfo->lhash_lock);
-	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
-		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
-			const struct ipv6_pinfo *np = inet6_sk(sk);
-			
-			score = 1;
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
-					continue;
-				score++;
-			}
-			if (sk->sk_bound_dev_if) {
-				if (sk->sk_bound_dev_if != dif)
-					continue;
-				score++;
-			}
-			if (score == 3) {
-				result = sk;
-				break;
+	const struct hlist_nulls_node *node;
+	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+	/* Optimize here for direct hit, only listening connections can
+	 * have wildcards anyways.
+	 */
+	unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+	unsigned int slot = hash & hashinfo->ehash_mask;
+	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+
+
+	rcu_read_lock();
+begin:
+	sk_nulls_for_each_rcu(sk, node, &head->chain) {
+		if (sk->sk_hash != hash)
+			continue;
+		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+			continue;
+		if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+			goto out;
+
+		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+			sock_gen_put(sk);
+			goto begin;
+		}
+		goto found;
+	}
+	if (get_nulls_value(node) != slot)
+		goto begin;
+out:
+	sk = NULL;
+found:
+	rcu_read_unlock();
+	return sk;
+}
+EXPORT_SYMBOL(__inet6_lookup_established);
+
+static inline int compute_score(struct sock *sk, struct net *net,
+				const unsigned short hnum,
+				const struct in6_addr *daddr,
+				const int dif)
+{
+	int score = -1;
+
+	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
+	    sk->sk_family == PF_INET6) {
+
+		score = 1;
+		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+				return -1;
+			score++;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				return -1;
+			score++;
+		}
+	}
+	return score;
+}
+
+struct sock *inet6_lookup_listener(struct net *net,
+		struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+		const __be16 sport, const struct in6_addr *daddr,
+		const unsigned short hnum, const int dif)
+{
+	struct sock *sk;
+	const struct hlist_nulls_node *node;
+	struct sock *result;
+	int score, hiscore, matches = 0, reuseport = 0;
+	u32 phash = 0;
+	unsigned int hash = inet_lhashfn(net, hnum);
+	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
+
+	rcu_read_lock();
+begin:
+	result = NULL;
+	hiscore = 0;
+	sk_nulls_for_each(sk, node, &ilb->head) {
+		score = compute_score(sk, net, hnum, daddr, dif);
+		if (score > hiscore) {
+			hiscore = score;
+			result = sk;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				phash = inet6_ehashfn(net, daddr, hnum,
+						      saddr, sport);
+				matches = 1;
 			}
-			if (score > hiscore) {
-				hiscore = score;
+		} else if (score == hiscore && reuseport) {
+			matches++;
+			if (((u64)phash * matches) >> 32 == 0)
 				result = sk;
-			}
+			phash = next_pseudo_random32(phash);
 		}
 	}
-	if (result)
-		sock_hold(result);
-	read_unlock(&hashinfo->lhash_lock);
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
+		goto begin;
+	if (result) {
+		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+			result = NULL;
+		else if (unlikely(compute_score(result, net, hnum, daddr,
+				  dif) < hiscore)) {
+			sock_put(result);
+			goto begin;
+		}
+	}
+	rcu_read_unlock();
 	return result;
 }
 
 EXPORT_SYMBOL_GPL(inet6_lookup_listener);
 
-struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
-			  const struct in6_addr *saddr, const u16 sport,
-			  const struct in6_addr *daddr, const u16 dport,
+struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+			  const struct in6_addr *saddr, const __be16 sport,
+			  const struct in6_addr *daddr, const __be16 dport,
 			  const int dif)
 {
 	struct sock *sk;
 
 	local_bh_disable();
-	sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+	sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif);
 	local_bh_enable();
 
 	return sk;
 }
 
 EXPORT_SYMBOL_GPL(inet6_lookup);
+
+static int __inet6_check_established(struct inet_timewait_death_row *death_row,
+				     struct sock *sk, const __u16 lport,
+				     struct inet_timewait_sock **twp)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	struct inet_sock *inet = inet_sk(sk);
+	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
+	const struct in6_addr *saddr = &sk->sk_v6_daddr;
+	const int dif = sk->sk_bound_dev_if;
+	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
+	struct net *net = sock_net(sk);
+	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
+						inet->inet_dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
+	struct sock *sk2;
+	const struct hlist_nulls_node *node;
+	struct inet_timewait_sock *tw = NULL;
+	int twrefcnt = 0;
+
+	spin_lock(lock);
+
+	sk_nulls_for_each(sk2, node, &head->chain) {
+		if (sk2->sk_hash != hash)
+			continue;
+
+		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+			if (sk2->sk_state == TCP_TIME_WAIT) {
+				tw = inet_twsk(sk2);
+				if (twsk_unique(sk, sk2, twp))
+					break;
+			}
+			goto not_unique;
+		}
+	}
+
+	/* Must record num and sport now. Otherwise we will see
+	 * in hash table socket with a funny identity.
+	 */
+	inet->inet_num = lport;
+	inet->inet_sport = htons(lport);
+	sk->sk_hash = hash;
+	WARN_ON(!sk_unhashed(sk));
+	__sk_nulls_add_node_rcu(sk, &head->chain);
+	if (tw) {
+		twrefcnt = inet_twsk_unhash(tw);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+	}
+	spin_unlock(lock);
+	if (twrefcnt)
+		inet_twsk_put(tw);
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+
+	if (twp) {
+		*twp = tw;
+	} else if (tw) {
+		/* Silly. Should hash-dance instead... */
+		inet_twsk_deschedule(tw, death_row);
+
+		inet_twsk_put(tw);
+	}
+	return 0;
+
+not_unique:
+	spin_unlock(lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet6_sk_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+
+	return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
+					  sk->sk_v6_daddr.s6_addr32,
+					  inet->inet_dport);
+}
+
+int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+		       struct sock *sk)
+{
+	return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk),
+			__inet6_check_established, __inet6_hash);
+}
+
+EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
new file mode 100644
index 00000000000..9a4d7322fb2
--- /dev/null
+++ b/net/ipv6/ip6_checksum.c
@@ -0,0 +1,124 @@
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <asm/checksum.h>
+
+#ifndef _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, unsigned short proto,
+			__wsum csum)
+{
+
+	int carry;
+	__u32 ulen;
+	__u32 uproto;
+	__u32 sum = (__force u32)csum;
+
+	sum += (__force u32)saddr->s6_addr32[0];
+	carry = (sum < (__force u32)saddr->s6_addr32[0]);
+	sum += carry;
+
+	sum += (__force u32)saddr->s6_addr32[1];
+	carry = (sum < (__force u32)saddr->s6_addr32[1]);
+	sum += carry;
+
+	sum += (__force u32)saddr->s6_addr32[2];
+	carry = (sum < (__force u32)saddr->s6_addr32[2]);
+	sum += carry;
+
+	sum += (__force u32)saddr->s6_addr32[3];
+	carry = (sum < (__force u32)saddr->s6_addr32[3]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[0];
+	carry = (sum < (__force u32)daddr->s6_addr32[0]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[1];
+	carry = (sum < (__force u32)daddr->s6_addr32[1]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[2];
+	carry = (sum < (__force u32)daddr->s6_addr32[2]);
+	sum += carry;
+
+	sum += (__force u32)daddr->s6_addr32[3];
+	carry = (sum < (__force u32)daddr->s6_addr32[3]);
+	sum += carry;
+
+	ulen = (__force u32)htonl((__u32) len);
+	sum += ulen;
+	carry = (sum < ulen);
+	sum += carry;
+
+	uproto = (__force u32)htonl(proto);
+	sum += uproto;
+	carry = (sum < uproto);
+	sum += carry;
+
+	return csum_fold((__force __wsum)sum);
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
+#endif
+
+int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
+{
+	int err;
+
+	UDP_SKB_CB(skb)->partial_cov = 0;
+	UDP_SKB_CB(skb)->cscov = skb->len;
+
+	if (proto == IPPROTO_UDPLITE) {
+		err = udplite_checksum_init(skb, uh);
+		if (err)
+			return err;
+	}
+
+	/* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
+	 * we accept a checksum of zero here. When we find the socket
+	 * for the UDP packet we'll check if that socket allows zero checksum
+	 * for IPv6 (set by socket option).
+	 */
+	return skb_checksum_init_zero_check(skb, proto, uh->check,
+					   ip6_compute_pseudo);
+}
+EXPORT_SYMBOL(udp6_csum_init);
+
+/* Function to set UDP checksum for an IPv6 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+		   const struct in6_addr *saddr,
+		   const struct in6_addr *daddr, int len)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	if (nocheck)
+		uh->check = 0;
+	else if (skb_is_gso(skb))
+		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+	else if (skb_dst(skb) && skb_dst(skb)->dev &&
+		 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+	} else {
+		__wsum csum;
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		uh->check = 0;
+		csum = skb_checksum(skb, 0, len, 0);
+		uh->check = udp_v6_check(len, saddr, daddr, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+EXPORT_SYMBOL(udp6_set_csum);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4fcc5a7acf6..cb4459bd1d2 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1,25 +1,24 @@
 /*
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *	Forwarding Information Database
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
- *
- *	$Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
+ *
+ *	Changes:
+ *	Yuji SEKIYA @USAGI:	Support default route on router node;
+ *				remove ip6_null_entry from the top of
+ *				routing table.
+ *	Ville Nuorvala:		Fixed routing subtrees.
  */
 
-/*
- * 	Changes:
- * 	Yuji SEKIYA @USAGI:	Support default route on router node;
- * 				remove ip6_null_entry from the top of
- * 				routing table.
- */
-#include <linux/config.h>
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/net.h>
@@ -27,10 +26,8 @@
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/init.h>
-
-#ifdef 	CONFIG_PROC_FS
-#include <linux/proc_fs.h>
-#endif
+#include <linux/list.h>
+#include <linux/slab.h>
 
 #include <net/ipv6.h>
 #include <net/ndisc.h>
@@ -42,17 +39,14 @@
 #define RT6_DEBUG 2
 
 #if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) printk(KERN_DEBUG x)
+#define RT6_TRACE(x...) pr_debug(x)
 #else
 #define RT6_TRACE(x...) do { ; } while (0)
 #endif
 
-struct rt6_statistics	rt6_stats;
-
-static kmem_cache_t * fib6_node_kmem __read_mostly;
+static struct kmem_cache *fib6_node_kmem __read_mostly;
 
-enum fib_walk_state_t
-{
+enum fib_walk_state_t {
 #ifdef CONFIG_IPV6_SUBTREES
 	FWS_S,
 #endif
@@ -62,26 +56,26 @@ enum fib_walk_state_t
 	FWS_U
 };
 
-struct fib6_cleaner_t
-{
+struct fib6_cleaner_t {
 	struct fib6_walker_t w;
+	struct net *net;
 	int (*func)(struct rt6_info *, void *arg);
 	void *arg;
 };
 
-DEFINE_RWLOCK(fib6_walker_lock);
-
+static DEFINE_RWLOCK(fib6_walker_lock);
 
 #ifdef CONFIG_IPV6_SUBTREES
 #define FWS_INIT FWS_S
-#define SUBTREE(fn) ((fn)->subtree)
 #else
 #define FWS_INIT FWS_L
-#define SUBTREE(fn) NULL
 #endif
 
-static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
-static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
+static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
+static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
+static int fib6_walk(struct fib6_walker_t *w);
+static int fib6_walk_continue(struct fib6_walker_t *w);
 
 /*
  *	A routing update causes an increase of the serial number on the
@@ -92,15 +86,24 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
 
 static __u32 rt_sernum;
 
-static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
+static void fib6_gc_timer_cb(unsigned long arg);
 
-struct fib6_walker_t fib6_walker_list = {
-	.prev	= &fib6_walker_list,
-	.next	= &fib6_walker_list, 
-};
+static LIST_HEAD(fib6_walkers);
+#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh)
 
-#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
+static inline void fib6_walker_link(struct fib6_walker_t *w)
+{
+	write_lock_bh(&fib6_walker_lock);
+	list_add(&w->lh, &fib6_walkers);
+	write_unlock_bh(&fib6_walker_lock);
+}
 
+static inline void fib6_walker_unlink(struct fib6_walker_t *w)
+{
+	write_lock_bh(&fib6_walker_lock);
+	list_del(&w->lh);
+	write_unlock_bh(&fib6_walker_lock);
+}
 static __inline__ u32 fib6_new_sernum(void)
 {
 	u32 n = ++rt_sernum;
@@ -112,92 +115,302 @@ static __inline__ u32 fib6_new_sernum(void)
 /*
  *	Auxiliary address test functions for the radix tree.
  *
- *	These assume a 32bit processor (although it will work on 
+ *	These assume a 32bit processor (although it will work on
  *	64bit processors)
  */
 
 /*
  *	test bit
  */
+#if defined(__LITTLE_ENDIAN)
+# define BITOP_BE32_SWIZZLE	(0x1F & ~7)
+#else
+# define BITOP_BE32_SWIZZLE	0
+#endif
 
-static __inline__ int addr_bit_set(void *token, int fn_bit)
+static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
 {
-	__u32 *addr = token;
+	const __be32 *addr = token;
+	/*
+	 * Here,
+	 *	1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+	 * is optimized version of
+	 *	htonl(1 << ((~fn_bit)&0x1F))
+	 * See include/asm-generic/bitops/le.h.
+	 */
+	return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
+	       addr[fn_bit >> 5];
+}
 
-	return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
+static __inline__ struct fib6_node *node_alloc(void)
+{
+	struct fib6_node *fn;
+
+	fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
+
+	return fn;
 }
 
-/*
- *	find the first different bit between two addresses
- *	length of address must be a multiple of 32bits
- */
+static __inline__ void node_free(struct fib6_node *fn)
+{
+	kmem_cache_free(fib6_node_kmem, fn);
+}
 
-static __inline__ int addr_diff(void *token1, void *token2, int addrlen)
+static __inline__ void rt6_release(struct rt6_info *rt)
 {
-	__u32 *a1 = token1;
-	__u32 *a2 = token2;
-	int i;
+	if (atomic_dec_and_test(&rt->rt6i_ref))
+		dst_free(&rt->dst);
+}
 
-	addrlen >>= 2;
+static void fib6_link_table(struct net *net, struct fib6_table *tb)
+{
+	unsigned int h;
 
-	for (i = 0; i < addrlen; i++) {
-		__u32 xb;
+	/*
+	 * Initialize table lock at a single place to give lockdep a key,
+	 * tables aren't visible prior to being linked to the list.
+	 */
+	rwlock_init(&tb->tb6_lock);
 
-		xb = a1[i] ^ a2[i];
+	h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
 
-		if (xb) {
-			int j = 31;
+	/*
+	 * No protection necessary, this is the only list mutatation
+	 * operation, tables never disappear once they exist.
+	 */
+	hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
+}
 
-			xb = ntohl(xb);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
-			while ((xb & (1 << j)) == 0)
-				j--;
+static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
+{
+	struct fib6_table *table;
+
+	table = kzalloc(sizeof(*table), GFP_ATOMIC);
+	if (table) {
+		table->tb6_id = id;
+		table->tb6_root.leaf = net->ipv6.ip6_null_entry;
+		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+		inet_peer_base_init(&table->tb6_peers);
+	}
 
-			return (i * 32 + 31 - j);
+	return table;
+}
+
+struct fib6_table *fib6_new_table(struct net *net, u32 id)
+{
+	struct fib6_table *tb;
+
+	if (id == 0)
+		id = RT6_TABLE_MAIN;
+	tb = fib6_get_table(net, id);
+	if (tb)
+		return tb;
+
+	tb = fib6_alloc_table(net, id);
+	if (tb)
+		fib6_link_table(net, tb);
+
+	return tb;
+}
+
+struct fib6_table *fib6_get_table(struct net *net, u32 id)
+{
+	struct fib6_table *tb;
+	struct hlist_head *head;
+	unsigned int h;
+
+	if (id == 0)
+		id = RT6_TABLE_MAIN;
+	h = id & (FIB6_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	head = &net->ipv6.fib_table_hash[h];
+	hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+		if (tb->tb6_id == id) {
+			rcu_read_unlock();
+			return tb;
 		}
 	}
+	rcu_read_unlock();
 
-	/*
-	 *	we should *never* get to this point since that 
-	 *	would mean the addrs are equal
-	 *
-	 *	However, we do get to it 8) And exacly, when
-	 *	addresses are equal 8)
-	 *
-	 *	ip route add 1111::/128 via ...
-	 *	ip route add 1111::/64 via ...
-	 *	and we are here.
-	 *
-	 *	Ideally, this function should stop comparison
-	 *	at prefix length. It does not, but it is still OK,
-	 *	if returned value is greater than prefix length.
-	 *					--ANK (980803)
-	 */
+	return NULL;
+}
 
-	return addrlen<<5;
+static void __net_init fib6_tables_init(struct net *net)
+{
+	fib6_link_table(net, net->ipv6.fib6_main_tbl);
+	fib6_link_table(net, net->ipv6.fib6_local_tbl);
 }
+#else
 
-static __inline__ struct fib6_node * node_alloc(void)
+struct fib6_table *fib6_new_table(struct net *net, u32 id)
 {
-	struct fib6_node *fn;
+	return fib6_get_table(net, id);
+}
 
-	if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL)
-		memset(fn, 0, sizeof(struct fib6_node));
+struct fib6_table *fib6_get_table(struct net *net, u32 id)
+{
+	  return net->ipv6.fib6_main_tbl;
+}
 
-	return fn;
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   int flags, pol_lookup_t lookup)
+{
+	return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
 }
 
-static __inline__ void node_free(struct fib6_node * fn)
+static void __net_init fib6_tables_init(struct net *net)
 {
-	kmem_cache_free(fib6_node_kmem, fn);
+	fib6_link_table(net, net->ipv6.fib6_main_tbl);
 }
 
-static __inline__ void rt6_release(struct rt6_info *rt)
+#endif
+
+static int fib6_dump_node(struct fib6_walker_t *w)
 {
-	if (atomic_dec_and_test(&rt->rt6i_ref))
-		dst_free(&rt->u.dst);
+	int res;
+	struct rt6_info *rt;
+
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+		res = rt6_dump_route(rt, w->args);
+		if (res < 0) {
+			/* Frame is full, suspend walking */
+			w->leaf = rt;
+			return 1;
+		}
+		WARN_ON(res == 0);
+	}
+	w->leaf = NULL;
+	return 0;
 }
 
+static void fib6_dump_end(struct netlink_callback *cb)
+{
+	struct fib6_walker_t *w = (void *)cb->args[2];
+
+	if (w) {
+		if (cb->args[4]) {
+			cb->args[4] = 0;
+			fib6_walker_unlink(w);
+		}
+		cb->args[2] = 0;
+		kfree(w);
+	}
+	cb->done = (void *)cb->args[3];
+	cb->args[1] = 3;
+}
+
+static int fib6_dump_done(struct netlink_callback *cb)
+{
+	fib6_dump_end(cb);
+	return cb->done ? cb->done(cb) : 0;
+}
+
+static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
+			   struct netlink_callback *cb)
+{
+	struct fib6_walker_t *w;
+	int res;
+
+	w = (void *)cb->args[2];
+	w->root = &table->tb6_root;
+
+	if (cb->args[4] == 0) {
+		w->count = 0;
+		w->skip = 0;
+
+		read_lock_bh(&table->tb6_lock);
+		res = fib6_walk(w);
+		read_unlock_bh(&table->tb6_lock);
+		if (res > 0) {
+			cb->args[4] = 1;
+			cb->args[5] = w->root->fn_sernum;
+		}
+	} else {
+		if (cb->args[5] != w->root->fn_sernum) {
+			/* Begin at the root if the tree changed */
+			cb->args[5] = w->root->fn_sernum;
+			w->state = FWS_INIT;
+			w->node = w->root;
+			w->skip = w->count;
+		} else
+			w->skip = 0;
+
+		read_lock_bh(&table->tb6_lock);
+		res = fib6_walk_continue(w);
+		read_unlock_bh(&table->tb6_lock);
+		if (res <= 0) {
+			fib6_walker_unlink(w);
+			cb->args[4] = 0;
+		}
+	}
+
+	return res;
+}
+
+static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
+	struct rt6_rtnl_dump_arg arg;
+	struct fib6_walker_t *w;
+	struct fib6_table *tb;
+	struct hlist_head *head;
+	int res = 0;
+
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+
+	w = (void *)cb->args[2];
+	if (!w) {
+		/* New dump:
+		 *
+		 * 1. hook callback destructor.
+		 */
+		cb->args[3] = (long)cb->done;
+		cb->done = fib6_dump_done;
+
+		/*
+		 * 2. allocate and initialize walker.
+		 */
+		w = kzalloc(sizeof(*w), GFP_ATOMIC);
+		if (!w)
+			return -ENOMEM;
+		w->func = fib6_dump_node;
+		cb->args[2] = (long)w;
+	}
+
+	arg.skb = skb;
+	arg.cb = cb;
+	arg.net = net;
+	w->args = &arg;
+
+	rcu_read_lock();
+	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
+		e = 0;
+		head = &net->ipv6.fib_table_hash[h];
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+			if (e < s_e)
+				goto next;
+			res = fib6_dump_table(tb, skb, cb);
+			if (res != 0)
+				goto out;
+next:
+			e++;
+		}
+	}
+out:
+	rcu_read_unlock();
+	cb->args[1] = e;
+	cb->args[0] = h;
+
+	res = res < 0 ? res : skb->len;
+	if (res <= 0)
+		fib6_dump_end(cb);
+	return res;
+}
 
 /*
  *	Routing Table
@@ -207,15 +420,16 @@ static __inline__ void rt6_release(struct rt6_info *rt)
  *	node.
  */
 
-static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
-				     int addrlen, int plen,
-				     int offset)
+static struct fib6_node *fib6_add_1(struct fib6_node *root,
+				     struct in6_addr *addr, int plen,
+				     int offset, int allow_create,
+				     int replace_required)
 {
 	struct fib6_node *fn, *in, *ln;
 	struct fib6_node *pn = NULL;
 	struct rt6key *key;
 	int	bit;
-       	int	dir = 0;
+	__be32	dir = 0;
 	__u32	sernum = fib6_new_sernum();
 
 	RT6_TRACE("fib6_add_1\n");
@@ -231,36 +445,60 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
 		 *	Prefix match
 		 */
 		if (plen < fn->fn_bit ||
-		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
+		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
+			if (!allow_create) {
+				if (replace_required) {
+					pr_warn("Can't replace route, no match found\n");
+					return ERR_PTR(-ENOENT);
+				}
+				pr_warn("NLM_F_CREATE should be set when creating new route\n");
+			}
 			goto insert_above;
-		
+		}
+
 		/*
 		 *	Exact match ?
 		 */
-			 
+
 		if (plen == fn->fn_bit) {
 			/* clean up an intermediate node */
-			if ((fn->fn_flags & RTN_RTINFO) == 0) {
+			if (!(fn->fn_flags & RTN_RTINFO)) {
 				rt6_release(fn->leaf);
 				fn->leaf = NULL;
 			}
-			
+
 			fn->fn_sernum = sernum;
-				
+
 			return fn;
 		}
 
 		/*
 		 *	We have more bits to go
 		 */
-			 
+
 		/* Try to walk down on tree. */
 		fn->fn_sernum = sernum;
 		dir = addr_bit_set(addr, fn->fn_bit);
 		pn = fn;
-		fn = dir ? fn->right: fn->left;
+		fn = dir ? fn->right : fn->left;
 	} while (fn);
 
+	if (!allow_create) {
+		/* We should not create new node because
+		 * NLM_F_REPLACE was specified without NLM_F_CREATE
+		 * I assume it is safe to require NLM_F_CREATE when
+		 * REPLACE flag is used! Later we may want to remove the
+		 * check for replace_required, because according
+		 * to netlink specification, NLM_F_CREATE
+		 * MUST be specified if new route is created.
+		 * That would keep IPv6 consistent with IPv4
+		 */
+		if (replace_required) {
+			pr_warn("Can't replace route, no match found\n");
+			return ERR_PTR(-ENOENT);
+		}
+		pr_warn("NLM_F_CREATE should be set when creating new route\n");
+	}
 	/*
 	 *	We walked to the bottom of tree.
 	 *	Create new leaf node without children.
@@ -268,10 +506,10 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
 
 	ln = node_alloc();
 
-	if (ln == NULL)
-		return NULL;
+	if (!ln)
+		return ERR_PTR(-ENOMEM);
 	ln->fn_bit = plen;
-			
+
 	ln->parent = pn;
 	ln->fn_sernum = sernum;
 
@@ -285,7 +523,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
 
 insert_above:
 	/*
-	 * split since we don't have a common prefix anymore or 
+	 * split since we don't have a common prefix anymore or
 	 * we have a less significant route.
 	 * we've to insert an intermediate node on the list
 	 * this new node will point to the one we need to create
@@ -296,31 +534,31 @@ insert_above:
 
 	/* find 1st bit in difference between the 2 addrs.
 
-	   See comment in addr_diff: bit may be an invalid value,
+	   See comment in __ipv6_addr_diff: bit may be an invalid value,
 	   but if it is >= plen, the value is ignored in any case.
 	 */
-	
-	bit = addr_diff(addr, &key->addr, addrlen);
 
-	/* 
-	 *		(intermediate)[in]	
+	bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
+
+	/*
+	 *		(intermediate)[in]
 	 *	          /	   \
 	 *	(new leaf node)[ln] (old node)[fn]
 	 */
 	if (plen > bit) {
 		in = node_alloc();
 		ln = node_alloc();
-		
-		if (in == NULL || ln == NULL) {
+
+		if (!in || !ln) {
 			if (in)
 				node_free(in);
 			if (ln)
 				node_free(ln);
-			return NULL;
+			return ERR_PTR(-ENOMEM);
 		}
 
-		/* 
-		 * new intermediate node. 
+		/*
+		 * new intermediate node.
 		 * RTN_RTINFO will
 		 * be off since that an address that chooses one of
 		 * the branches would not match less specific routes
@@ -357,7 +595,7 @@ insert_above:
 		}
 	} else { /* plen <= bit */
 
-		/* 
+		/*
 		 *		(new leaf node)[ln]
 		 *	          /	   \
 		 *	     (old node)[fn] NULL
@@ -365,15 +603,15 @@ insert_above:
 
 		ln = node_alloc();
 
-		if (ln == NULL)
-			return NULL;
+		if (!ln)
+			return ERR_PTR(-ENOMEM);
 
 		ln->fn_bit = plen;
 
 		ln->parent = pn;
 
 		ln->fn_sernum = sernum;
-		
+
 		if (dir)
 			pn->right = ln;
 		else
@@ -389,27 +627,61 @@ insert_above:
 	return ln;
 }
 
+static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
+{
+	return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+	       RTF_GATEWAY;
+}
+
+static int fib6_commit_metrics(struct dst_entry *dst,
+			       struct nlattr *mx, int mx_len)
+{
+	struct nlattr *nla;
+	int remaining;
+	u32 *mp;
+
+	if (dst->flags & DST_HOST) {
+		mp = dst_metrics_write_ptr(dst);
+	} else {
+		mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+		if (!mp)
+			return -ENOMEM;
+		dst_init_metrics(dst, mp, 0);
+	}
+
+	nla_for_each_attr(nla, mx, mx_len, remaining) {
+		int type = nla_type(nla);
+
+		if (type) {
+			if (type > RTAX_MAX)
+				return -EINVAL;
+
+			mp[type - 1] = nla_get_u32(nla);
+		}
+	}
+	return 0;
+}
+
 /*
  *	Insert routing information in a node.
  */
 
 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
-		struct nlmsghdr *nlh,  struct netlink_skb_parms *req)
+			    struct nl_info *info, struct nlattr *mx, int mx_len)
 {
 	struct rt6_info *iter = NULL;
 	struct rt6_info **ins;
+	int replace = (info->nlh &&
+		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
+	int add = (!info->nlh ||
+		   (info->nlh->nlmsg_flags & NLM_F_CREATE));
+	int found = 0;
+	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+	int err;
 
 	ins = &fn->leaf;
 
-	if (fn->fn_flags&RTN_TL_ROOT &&
-	    fn->leaf == &ip6_null_entry &&
-	    !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
-		fn->leaf = rt;
-		rt->u.next = NULL;
-		goto out;
-	}
-
-	for (iter = fn->leaf; iter; iter=iter->u.next) {
+	for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
 		/*
 		 *	Search for duplicates
 		 */
@@ -418,59 +690,149 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			/*
 			 *	Same priority level
 			 */
+			if (info->nlh &&
+			    (info->nlh->nlmsg_flags & NLM_F_EXCL))
+				return -EEXIST;
+			if (replace) {
+				found++;
+				break;
+			}
 
-			if (iter->rt6i_dev == rt->rt6i_dev &&
+			if (iter->dst.dev == rt->dst.dev &&
 			    iter->rt6i_idev == rt->rt6i_idev &&
 			    ipv6_addr_equal(&iter->rt6i_gateway,
 					    &rt->rt6i_gateway)) {
-				if (!(iter->rt6i_flags&RTF_EXPIRES))
+				if (rt->rt6i_nsiblings)
+					rt->rt6i_nsiblings = 0;
+				if (!(iter->rt6i_flags & RTF_EXPIRES))
 					return -EEXIST;
-				iter->rt6i_expires = rt->rt6i_expires;
-				if (!(rt->rt6i_flags&RTF_EXPIRES)) {
-					iter->rt6i_flags &= ~RTF_EXPIRES;
-					iter->rt6i_expires = 0;
-				}
+				if (!(rt->rt6i_flags & RTF_EXPIRES))
+					rt6_clean_expires(iter);
+				else
+					rt6_set_expires(iter, rt->dst.expires);
 				return -EEXIST;
 			}
+			/* If we have the same destination and the same metric,
+			 * but not the same gateway, then the route we try to
+			 * add is sibling to this route, increment our counter
+			 * of siblings, and later we will add our route to the
+			 * list.
+			 * Only static routes (which don't have flag
+			 * RTF_EXPIRES) are used for ECMPv6.
+			 *
+			 * To avoid long list, we only had siblings if the
+			 * route have a gateway.
+			 */
+			if (rt_can_ecmp &&
+			    rt6_qualify_for_ecmp(iter))
+				rt->rt6i_nsiblings++;
 		}
 
 		if (iter->rt6i_metric > rt->rt6i_metric)
 			break;
 
-		ins = &iter->u.next;
+		ins = &iter->dst.rt6_next;
+	}
+
+	/* Reset round-robin state, if necessary */
+	if (ins == &fn->leaf)
+		fn->rr_ptr = NULL;
+
+	/* Link this route to others same route. */
+	if (rt->rt6i_nsiblings) {
+		unsigned int rt6i_nsiblings;
+		struct rt6_info *sibling, *temp_sibling;
+
+		/* Find the first route that have the same metric */
+		sibling = fn->leaf;
+		while (sibling) {
+			if (sibling->rt6i_metric == rt->rt6i_metric &&
+			    rt6_qualify_for_ecmp(sibling)) {
+				list_add_tail(&rt->rt6i_siblings,
+					      &sibling->rt6i_siblings);
+				break;
+			}
+			sibling = sibling->dst.rt6_next;
+		}
+		/* For each sibling in the list, increment the counter of
+		 * siblings. BUG() if counters does not match, list of siblings
+		 * is broken!
+		 */
+		rt6i_nsiblings = 0;
+		list_for_each_entry_safe(sibling, temp_sibling,
+					 &rt->rt6i_siblings, rt6i_siblings) {
+			sibling->rt6i_nsiblings++;
+			BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings);
+			rt6i_nsiblings++;
+		}
+		BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
 	}
 
 	/*
 	 *	insert node
 	 */
+	if (!replace) {
+		if (!add)
+			pr_warn("NLM_F_CREATE should be set when creating new route\n");
+
+add:
+		if (mx) {
+			err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+			if (err)
+				return err;
+		}
+		rt->dst.rt6_next = iter;
+		*ins = rt;
+		rt->rt6i_node = fn;
+		atomic_inc(&rt->rt6i_ref);
+		inet6_rt_notify(RTM_NEWROUTE, rt, info);
+		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
+
+		if (!(fn->fn_flags & RTN_RTINFO)) {
+			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+			fn->fn_flags |= RTN_RTINFO;
+		}
 
-out:
-	rt->u.next = iter;
-	*ins = rt;
-	rt->rt6i_node = fn;
-	atomic_inc(&rt->rt6i_ref);
-	inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req);
-	rt6_stats.fib_rt_entries++;
-
-	if ((fn->fn_flags & RTN_RTINFO) == 0) {
-		rt6_stats.fib_route_nodes++;
-		fn->fn_flags |= RTN_RTINFO;
+	} else {
+		if (!found) {
+			if (add)
+				goto add;
+			pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
+			return -ENOENT;
+		}
+		if (mx) {
+			err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+			if (err)
+				return err;
+		}
+		*ins = rt;
+		rt->rt6i_node = fn;
+		rt->dst.rt6_next = iter->dst.rt6_next;
+		atomic_inc(&rt->rt6i_ref);
+		inet6_rt_notify(RTM_NEWROUTE, rt, info);
+		rt6_release(iter);
+		if (!(fn->fn_flags & RTN_RTINFO)) {
+			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+			fn->fn_flags |= RTN_RTINFO;
+		}
 	}
 
 	return 0;
 }
 
-static __inline__ void fib6_start_gc(struct rt6_info *rt)
+static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
 {
-	if (ip6_fib_timer.expires == 0 &&
-	    (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
-		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
+	    (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
+		mod_timer(&net->ipv6.ip6_fib_timer,
+			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
-void fib6_force_start_gc(void)
+void fib6_force_start_gc(struct net *net)
 {
-	if (ip6_fib_timer.expires == 0)
-		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+	if (!timer_pending(&net->ipv6.ip6_fib_timer))
+		mod_timer(&net->ipv6.ip6_fib_timer,
+			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
 /*
@@ -479,23 +841,39 @@ void fib6_force_start_gc(void)
  *	with source addr info in sub-trees
  */
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, 
-		struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
+	     struct nlattr *mx, int mx_len)
 {
-	struct fib6_node *fn;
+	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
-
-	fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
-			rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
-
-	if (fn == NULL)
+	int allow_create = 1;
+	int replace_required = 0;
+
+	if (info->nlh) {
+		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+			allow_create = 0;
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+			replace_required = 1;
+	}
+	if (!allow_create && !replace_required)
+		pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
+
+	fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+			offsetof(struct rt6_info, rt6i_dst), allow_create,
+			replace_required);
+	if (IS_ERR(fn)) {
+		err = PTR_ERR(fn);
+		fn = NULL;
 		goto out;
+	}
+
+	pn = fn;
 
 #ifdef CONFIG_IPV6_SUBTREES
 	if (rt->rt6i_src.plen) {
 		struct fib6_node *sn;
 
-		if (fn->subtree == NULL) {
+		if (!fn->subtree) {
 			struct fib6_node *sfn;
 
 			/*
@@ -510,60 +888,85 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 			/* Create subtree root node */
 			sfn = node_alloc();
-			if (sfn == NULL)
+			if (!sfn)
 				goto st_failure;
 
-			sfn->leaf = &ip6_null_entry;
-			atomic_inc(&ip6_null_entry.rt6i_ref);
+			sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
+			atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
 			sfn->fn_flags = RTN_ROOT;
 			sfn->fn_sernum = fib6_new_sernum();
 
 			/* Now add the first leaf node to new subtree */
 
 			sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
-					sizeof(struct in6_addr), rt->rt6i_src.plen,
-					offsetof(struct rt6_info, rt6i_src));
+					rt->rt6i_src.plen,
+					offsetof(struct rt6_info, rt6i_src),
+					allow_create, replace_required);
 
-			if (sn == NULL) {
+			if (IS_ERR(sn)) {
 				/* If it is failed, discard just allocated
 				   root, and then (in st_failure) stale node
 				   in main tree.
 				 */
 				node_free(sfn);
+				err = PTR_ERR(sn);
 				goto st_failure;
 			}
 
 			/* Now link new subtree to main tree */
 			sfn->parent = fn;
 			fn->subtree = sfn;
-			if (fn->leaf == NULL) {
-				fn->leaf = rt;
-				atomic_inc(&rt->rt6i_ref);
-			}
 		} else {
 			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
-					sizeof(struct in6_addr), rt->rt6i_src.plen,
-					offsetof(struct rt6_info, rt6i_src));
+					rt->rt6i_src.plen,
+					offsetof(struct rt6_info, rt6i_src),
+					allow_create, replace_required);
 
-			if (sn == NULL)
+			if (IS_ERR(sn)) {
+				err = PTR_ERR(sn);
 				goto st_failure;
+			}
 		}
 
+		if (!fn->leaf) {
+			fn->leaf = rt;
+			atomic_inc(&rt->rt6i_ref);
+		}
 		fn = sn;
 	}
 #endif
 
-	err = fib6_add_rt2node(fn, rt, nlh, req);
-
-	if (err == 0) {
-		fib6_start_gc(rt);
-		if (!(rt->rt6i_flags&RTF_CACHE))
-			fib6_prune_clones(fn, rt);
+	err = fib6_add_rt2node(fn, rt, info, mx, mx_len);
+	if (!err) {
+		fib6_start_gc(info->nl_net, rt);
+		if (!(rt->rt6i_flags & RTF_CACHE))
+			fib6_prune_clones(info->nl_net, pn);
 	}
 
 out:
-	if (err)
-		dst_free(&rt->u.dst);
+	if (err) {
+#ifdef CONFIG_IPV6_SUBTREES
+		/*
+		 * If fib6_add_1 has cleared the old leaf pointer in the
+		 * super-tree leaf node we have to find a new one for it.
+		 */
+		if (pn != fn && pn->leaf == rt) {
+			pn->leaf = NULL;
+			atomic_dec(&rt->rt6i_ref);
+		}
+		if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
+			pn->leaf = fib6_find_prefix(info->nl_net, pn);
+#if RT6_DEBUG >= 2
+			if (!pn->leaf) {
+				WARN_ON(pn->leaf == NULL);
+				pn->leaf = info->nl_net->ipv6.ip6_null_entry;
+			}
+#endif
+			atomic_inc(&pn->leaf->rt6i_ref);
+		}
+#endif
+		dst_free(&rt->dst);
+	}
 	return err;
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -572,8 +975,8 @@ out:
 	 */
 st_failure:
 	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
-		fib6_repair_tree(fn);
-	dst_free(&rt->u.dst);
+		fib6_repair_tree(info->nl_net, fn);
+	dst_free(&rt->dst);
 	return err;
 #endif
 }
@@ -584,15 +987,18 @@ st_failure:
  */
 
 struct lookup_args {
-	int		offset;		/* key offset on rt6_info	*/
-	struct in6_addr	*addr;		/* search key			*/
+	int			offset;		/* key offset on rt6_info	*/
+	const struct in6_addr	*addr;		/* search key			*/
 };
 
-static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
-					struct lookup_args *args)
+static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
+				       struct lookup_args *args)
 {
 	struct fib6_node *fn;
-	int dir;
+	__be32 dir;
+
+	if (unlikely(args->offset == 0))
+		return NULL;
 
 	/*
 	 *	Descend on a tree
@@ -611,36 +1017,36 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
 			fn = next;
 			continue;
 		}
-
 		break;
 	}
 
-	while ((fn->fn_flags & RTN_ROOT) == 0) {
-#ifdef CONFIG_IPV6_SUBTREES
-		if (fn->subtree) {
-			struct fib6_node *st;
-			struct lookup_args *narg;
-
-			narg = args + 1;
-
-			if (narg->addr) {
-				st = fib6_lookup_1(fn->subtree, narg);
-
-				if (st && !(st->fn_flags & RTN_ROOT))
-					return st;
-			}
-		}
-#endif
-
-		if (fn->fn_flags & RTN_RTINFO) {
+	while (fn) {
+		if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
 			struct rt6key *key;
 
 			key = (struct rt6key *) ((u8 *) fn->leaf +
 						 args->offset);
 
-			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
-				return fn;
+			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
+#ifdef CONFIG_IPV6_SUBTREES
+				if (fn->subtree) {
+					struct fib6_node *sfn;
+					sfn = fib6_lookup_1(fn->subtree,
+							    args + 1);
+					if (!sfn)
+						goto backtrack;
+					fn = sfn;
+				}
+#endif
+				if (fn->fn_flags & RTN_RTINFO)
+					return fn;
+			}
 		}
+#ifdef CONFIG_IPV6_SUBTREES
+backtrack:
+#endif
+		if (fn->fn_flags & RTN_ROOT)
+			break;
 
 		fn = fn->parent;
 	}
@@ -648,23 +1054,28 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
 	return NULL;
 }
 
-struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
-			       struct in6_addr *saddr)
+struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
+			      const struct in6_addr *saddr)
 {
-	struct lookup_args args[2];
 	struct fib6_node *fn;
-
-	args[0].offset = offsetof(struct rt6_info, rt6i_dst);
-	args[0].addr = daddr;
-
+	struct lookup_args args[] = {
+		{
+			.offset = offsetof(struct rt6_info, rt6i_dst),
+			.addr = daddr,
+		},
 #ifdef CONFIG_IPV6_SUBTREES
-	args[1].offset = offsetof(struct rt6_info, rt6i_src);
-	args[1].addr = saddr;
+		{
+			.offset = offsetof(struct rt6_info, rt6i_src),
+			.addr = saddr,
+		},
 #endif
+		{
+			.offset = 0,	/* sentinel */
+		}
+	};
 
-	fn = fib6_lookup_1(root, args);
-
-	if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
+	fn = fib6_lookup_1(root, daddr ? args : args + 1);
+	if (!fn || fn->fn_flags & RTN_TL_ROOT)
 		fn = root;
 
 	return fn;
@@ -676,9 +1087,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
  */
 
 
-static struct fib6_node * fib6_locate_1(struct fib6_node *root,
-					struct in6_addr *addr,
-					int plen, int offset)
+static struct fib6_node *fib6_locate_1(struct fib6_node *root,
+				       const struct in6_addr *addr,
+				       int plen, int offset)
 {
 	struct fib6_node *fn;
 
@@ -706,9 +1117,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root,
 	return NULL;
 }
 
-struct fib6_node * fib6_locate(struct fib6_node *root,
-			       struct in6_addr *daddr, int dst_len,
-			       struct in6_addr *saddr, int src_len)
+struct fib6_node *fib6_locate(struct fib6_node *root,
+			      const struct in6_addr *daddr, int dst_len,
+			      const struct in6_addr *saddr, int src_len)
 {
 	struct fib6_node *fn;
 
@@ -717,16 +1128,14 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
 
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src_len) {
-		BUG_TRAP(saddr!=NULL);
-		if (fn == NULL)
-			fn = fn->subtree;
-		if (fn)
-			fn = fib6_locate_1(fn, saddr, src_len,
+		WARN_ON(saddr == NULL);
+		if (fn && fn->subtree)
+			fn = fib6_locate_1(fn->subtree, saddr, src_len,
 					   offsetof(struct rt6_info, rt6i_src));
 	}
 #endif
 
-	if (fn && fn->fn_flags&RTN_RTINFO)
+	if (fn && fn->fn_flags & RTN_RTINFO)
 		return fn;
 
 	return NULL;
@@ -738,19 +1147,18 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
  *
  */
 
-static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
+static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
 {
-	if (fn->fn_flags&RTN_ROOT)
-		return &ip6_null_entry;
+	if (fn->fn_flags & RTN_ROOT)
+		return net->ipv6.ip6_null_entry;
 
-	while(fn) {
-		if(fn->left)
+	while (fn) {
+		if (fn->left)
 			return fn->left->leaf;
-
-		if(fn->right)
+		if (fn->right)
 			return fn->right->leaf;
 
-		fn = SUBTREE(fn);
+		fn = FIB6_SUBTREE(fn);
 	}
 	return NULL;
 }
@@ -760,7 +1168,8 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
  *	is the node we want to try and remove.
  */
 
-static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
+static struct fib6_node *fib6_repair_tree(struct net *net,
+					   struct fib6_node *fn)
 {
 	int children;
 	int nstate;
@@ -772,26 +1181,28 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
 		iter++;
 
-		BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
-		BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
-		BUG_TRAP(fn->leaf==NULL);
+		WARN_ON(fn->fn_flags & RTN_RTINFO);
+		WARN_ON(fn->fn_flags & RTN_TL_ROOT);
+		WARN_ON(fn->leaf != NULL);
 
 		children = 0;
 		child = NULL;
-		if (fn->right) child = fn->right, children |= 1;
-		if (fn->left) child = fn->left, children |= 2;
+		if (fn->right)
+			child = fn->right, children |= 1;
+		if (fn->left)
+			child = fn->left, children |= 2;
 
-		if (children == 3 || SUBTREE(fn) 
+		if (children == 3 || FIB6_SUBTREE(fn)
 #ifdef CONFIG_IPV6_SUBTREES
 		    /* Subtree root (i.e. fn) may have one child */
-		    || (children && fn->fn_flags&RTN_ROOT)
+		    || (children && fn->fn_flags & RTN_ROOT)
 #endif
 		    ) {
-			fn->leaf = fib6_find_prefix(fn);
+			fn->leaf = fib6_find_prefix(net, fn);
 #if RT6_DEBUG >= 2
-			if (fn->leaf==NULL) {
-				BUG_TRAP(fn->leaf);
-				fn->leaf = &ip6_null_entry;
+			if (!fn->leaf) {
+				WARN_ON(!fn->leaf);
+				fn->leaf = net->ipv6.ip6_null_entry;
 			}
 #endif
 			atomic_inc(&fn->leaf->rt6i_ref);
@@ -800,17 +1211,20 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 
 		pn = fn->parent;
 #ifdef CONFIG_IPV6_SUBTREES
-		if (SUBTREE(pn) == fn) {
-			BUG_TRAP(fn->fn_flags&RTN_ROOT);
-			SUBTREE(pn) = NULL;
+		if (FIB6_SUBTREE(pn) == fn) {
+			WARN_ON(!(fn->fn_flags & RTN_ROOT));
+			FIB6_SUBTREE(pn) = NULL;
 			nstate = FWS_L;
 		} else {
-			BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
+			WARN_ON(fn->fn_flags & RTN_ROOT);
 #endif
-			if (pn->right == fn) pn->right = child;
-			else if (pn->left == fn) pn->left = child;
+			if (pn->right == fn)
+				pn->right = child;
+			else if (pn->left == fn)
+				pn->left = child;
 #if RT6_DEBUG >= 2
-			else BUG_TRAP(0);
+			else
+				WARN_ON(1);
 #endif
 			if (child)
 				child->parent = pn;
@@ -821,7 +1235,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 
 		read_lock(&fib6_walker_lock);
 		FOR_WALKERS(w) {
-			if (child == NULL) {
+			if (!child) {
 				if (w->root == fn) {
 					w->root = w->node = NULL;
 					RT6_TRACE("W %p adjusted by delroot 1\n", w);
@@ -839,10 +1253,10 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 					w->node = child;
 					if (children&2) {
 						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
-						w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+						w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
 					} else {
 						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
-						w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+						w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
 					}
 				}
 			}
@@ -850,7 +1264,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 		read_unlock(&fib6_walker_lock);
 
 		node_free(fn);
-		if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
+		if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
 			return pn;
 
 		rt6_release(pn->leaf);
@@ -860,41 +1274,54 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 }
 
 static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
-    struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+			   struct nl_info *info)
 {
 	struct fib6_walker_t *w;
 	struct rt6_info *rt = *rtp;
+	struct net *net = info->nl_net;
 
 	RT6_TRACE("fib6_del_route\n");
 
 	/* Unlink it */
-	*rtp = rt->u.next;
+	*rtp = rt->dst.rt6_next;
 	rt->rt6i_node = NULL;
-	rt6_stats.fib_rt_entries--;
-	rt6_stats.fib_discarded_routes++;
+	net->ipv6.rt6_stats->fib_rt_entries--;
+	net->ipv6.rt6_stats->fib_discarded_routes++;
+
+	/* Reset round-robin state, if necessary */
+	if (fn->rr_ptr == rt)
+		fn->rr_ptr = NULL;
+
+	/* Remove this entry from other siblings */
+	if (rt->rt6i_nsiblings) {
+		struct rt6_info *sibling, *next_sibling;
+
+		list_for_each_entry_safe(sibling, next_sibling,
+					 &rt->rt6i_siblings, rt6i_siblings)
+			sibling->rt6i_nsiblings--;
+		rt->rt6i_nsiblings = 0;
+		list_del_init(&rt->rt6i_siblings);
+	}
 
 	/* Adjust walkers */
 	read_lock(&fib6_walker_lock);
 	FOR_WALKERS(w) {
 		if (w->state == FWS_C && w->leaf == rt) {
 			RT6_TRACE("walker %p adjusted by delroute\n", w);
-			w->leaf = rt->u.next;
-			if (w->leaf == NULL)
+			w->leaf = rt->dst.rt6_next;
+			if (!w->leaf)
 				w->state = FWS_U;
 		}
 	}
 	read_unlock(&fib6_walker_lock);
 
-	rt->u.next = NULL;
-
-	if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
-		fn->leaf = &ip6_null_entry;
+	rt->dst.rt6_next = NULL;
 
 	/* If it was last route, expunge its radix tree node */
-	if (fn->leaf == NULL) {
+	if (!fn->leaf) {
 		fn->fn_flags &= ~RTN_RTINFO;
-		rt6_stats.fib_route_nodes--;
-		fn = fib6_repair_tree(fn);
+		net->ipv6.rt6_stats->fib_route_nodes--;
+		fn = fib6_repair_tree(net, fn);
 	}
 
 	if (atomic_read(&rt->rt6i_ref) != 1) {
@@ -905,47 +1332,58 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 		 * to still alive ones.
 		 */
 		while (fn) {
-			if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
-				fn->leaf = fib6_find_prefix(fn);
+			if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
+				fn->leaf = fib6_find_prefix(net, fn);
 				atomic_inc(&fn->leaf->rt6i_ref);
 				rt6_release(rt);
 			}
 			fn = fn->parent;
 		}
 		/* No more references are possible at this point. */
-		if (atomic_read(&rt->rt6i_ref) != 1) BUG();
+		BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
 	}
 
-	inet6_rt_notify(RTM_DELROUTE, rt, nlh, req);
+	inet6_rt_notify(RTM_DELROUTE, rt, info);
 	rt6_release(rt);
 }
 
-int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int fib6_del(struct rt6_info *rt, struct nl_info *info)
 {
+	struct net *net = info->nl_net;
 	struct fib6_node *fn = rt->rt6i_node;
 	struct rt6_info **rtp;
 
 #if RT6_DEBUG >= 2
-	if (rt->u.dst.obsolete>0) {
-		BUG_TRAP(fn==NULL);
+	if (rt->dst.obsolete > 0) {
+		WARN_ON(fn != NULL);
 		return -ENOENT;
 	}
 #endif
-	if (fn == NULL || rt == &ip6_null_entry)
+	if (!fn || rt == net->ipv6.ip6_null_entry)
 		return -ENOENT;
 
-	BUG_TRAP(fn->fn_flags&RTN_RTINFO);
+	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
 
-	if (!(rt->rt6i_flags&RTF_CACHE))
-		fib6_prune_clones(fn, rt);
+	if (!(rt->rt6i_flags & RTF_CACHE)) {
+		struct fib6_node *pn = fn;
+#ifdef CONFIG_IPV6_SUBTREES
+		/* clones of this route might be in another subtree */
+		if (rt->rt6i_src.plen) {
+			while (!(pn->fn_flags & RTN_ROOT))
+				pn = pn->parent;
+			pn = pn->parent;
+		}
+#endif
+		fib6_prune_clones(info->nl_net, pn);
+	}
 
 	/*
 	 *	Walk the leaf entries looking for ourself
 	 */
 
-	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
+	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
 		if (*rtp == rt) {
-			fib6_del_route(fn, rtp, nlh, _rtattr, req);
+			fib6_del_route(fn, rtp, info);
 			return 0;
 		}
 	}
@@ -959,7 +1397,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
  *	However, it is internally reenterable wrt itself and fib6_add/fib6_del.
  *	It means, that we can modify tree during walking
  *	and use this function for garbage collection, clone pruning,
- *	cleaning tree when a device goes down etc. etc.	
+ *	cleaning tree when a device goes down etc. etc.
  *
  *	It guarantees that every node will be traversed,
  *	and that it will be traversed only once.
@@ -976,29 +1414,29 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
  *	<0  -> walk is terminated by an error.
  */
 
-int fib6_walk_continue(struct fib6_walker_t *w)
+static int fib6_walk_continue(struct fib6_walker_t *w)
 {
 	struct fib6_node *fn, *pn;
 
 	for (;;) {
 		fn = w->node;
-		if (fn == NULL)
+		if (!fn)
 			return 0;
 
 		if (w->prune && fn != w->root &&
-		    fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
+		    fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
 			w->state = FWS_C;
 			w->leaf = fn->leaf;
 		}
 		switch (w->state) {
 #ifdef CONFIG_IPV6_SUBTREES
 		case FWS_S:
-			if (SUBTREE(fn)) {
-				w->node = SUBTREE(fn);
+			if (FIB6_SUBTREE(fn)) {
+				w->node = FIB6_SUBTREE(fn);
 				continue;
 			}
 			w->state = FWS_L;
-#endif	
+#endif
 		case FWS_L:
 			if (fn->left) {
 				w->node = fn->left;
@@ -1015,12 +1453,22 @@ int fib6_walk_continue(struct fib6_walker_t *w)
 			w->state = FWS_C;
 			w->leaf = fn->leaf;
 		case FWS_C:
-			if (w->leaf && fn->fn_flags&RTN_RTINFO) {
-				int err = w->func(w);
+			if (w->leaf && fn->fn_flags & RTN_RTINFO) {
+				int err;
+
+				if (w->skip) {
+					w->skip--;
+					goto skip;
+				}
+
+				err = w->func(w);
 				if (err)
 					return err;
+
+				w->count++;
 				continue;
 			}
+skip:
 			w->state = FWS_U;
 		case FWS_U:
 			if (fn == w->root)
@@ -1028,8 +1476,8 @@ int fib6_walk_continue(struct fib6_walker_t *w)
 			pn = fn->parent;
 			w->node = pn;
 #ifdef CONFIG_IPV6_SUBTREES
-			if (SUBTREE(pn) == fn) {
-				BUG_TRAP(fn->fn_flags&RTN_ROOT);
+			if (FIB6_SUBTREE(pn) == fn) {
+				WARN_ON(!(fn->fn_flags & RTN_ROOT));
 				w->state = FWS_L;
 				continue;
 			}
@@ -1044,13 +1492,13 @@ int fib6_walk_continue(struct fib6_walker_t *w)
 				continue;
 			}
 #if RT6_DEBUG >= 2
-			BUG_TRAP(0);
+			WARN_ON(1);
 #endif
 		}
 	}
 }
 
-int fib6_walk(struct fib6_walker_t *w)
+static int fib6_walk(struct fib6_walker_t *w)
 {
 	int res;
 
@@ -1068,22 +1516,26 @@ static int fib6_clean_node(struct fib6_walker_t *w)
 {
 	int res;
 	struct rt6_info *rt;
-	struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
+	struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
+	struct nl_info info = {
+		.nl_net = c->net,
+	};
 
-	for (rt = w->leaf; rt; rt = rt->u.next) {
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
-			res = fib6_del(rt, NULL, NULL, NULL);
+			res = fib6_del(rt, &info);
 			if (res) {
 #if RT6_DEBUG >= 2
-				printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
+				pr_debug("%s: del failed: rt=%p@%p err=%d\n",
+					 __func__, rt, rt->rt6i_node, res);
 #endif
 				continue;
 			}
 			return 0;
 		}
-		BUG_TRAP(res==0);
+		WARN_ON(res != 0);
 	}
 	w->leaf = rt;
 	return 0;
@@ -1091,7 +1543,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
 
 /*
  *	Convenient frontend to tree walker.
- *	
+ *
  *	func is called on each route.
  *		It may return -1 -> delete this route.
  *		              0  -> continue walking
@@ -1100,21 +1552,44 @@ static int fib6_clean_node(struct fib6_walker_t *w)
  *	ignoring pure split nodes) will be scanned.
  */
 
-void fib6_clean_tree(struct fib6_node *root,
-		     int (*func)(struct rt6_info *, void *arg),
-		     int prune, void *arg)
+static void fib6_clean_tree(struct net *net, struct fib6_node *root,
+			    int (*func)(struct rt6_info *, void *arg),
+			    int prune, void *arg)
 {
 	struct fib6_cleaner_t c;
 
 	c.w.root = root;
 	c.w.func = fib6_clean_node;
 	c.w.prune = prune;
+	c.w.count = 0;
+	c.w.skip = 0;
 	c.func = func;
 	c.arg = arg;
+	c.net = net;
 
 	fib6_walk(&c.w);
 }
 
+void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
+		    void *arg)
+{
+	struct fib6_table *table;
+	struct hlist_head *head;
+	unsigned int h;
+
+	rcu_read_lock();
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+		head = &net->ipv6.fib_table_hash[h];
+		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+			write_lock_bh(&table->tb6_lock);
+			fib6_clean_tree(net, &table->tb6_root,
+					func, 0, arg);
+			write_unlock_bh(&table->tb6_lock);
+		}
+	}
+	rcu_read_unlock();
+}
+
 static int fib6_prune_clone(struct rt6_info *rt, void *arg)
 {
 	if (rt->rt6i_flags & RTF_CACHE) {
@@ -1125,9 +1600,9 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg)
 	return 0;
 }
 
-static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
 {
-	fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
+	fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL);
 }
 
 /*
@@ -1152,23 +1627,31 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 	 *	only if they are not in use now.
 	 */
 
-	if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
-		if (time_after(now, rt->rt6i_expires)) {
+	if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
+		if (time_after(now, rt->dst.expires)) {
 			RT6_TRACE("expiring %p\n", rt);
-			rt6_reset_dflt_pointer(rt);
 			return -1;
 		}
 		gc_args.more++;
 	} else if (rt->rt6i_flags & RTF_CACHE) {
-		if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
-		    time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
+		if (atomic_read(&rt->dst.__refcnt) == 0 &&
+		    time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
 			RT6_TRACE("aging clone %p\n", rt);
 			return -1;
-		} else if ((rt->rt6i_flags & RTF_GATEWAY) &&
-			   (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
-			RT6_TRACE("purging route %p via non-router but gateway\n",
-				  rt);
-			return -1;
+		} else if (rt->rt6i_flags & RTF_GATEWAY) {
+			struct neighbour *neigh;
+			__u8 neigh_flags = 0;
+
+			neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
+			if (neigh) {
+				neigh_flags = neigh->flags;
+				neigh_release(neigh);
+			}
+			if (!(neigh_flags & NTF_ROUTER)) {
+				RT6_TRACE("purging route %p via non-router but gateway\n",
+					  rt);
+				return -1;
+			}
 		}
 		gc_args.more++;
 	}
@@ -1178,49 +1661,331 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 
 static DEFINE_SPINLOCK(fib6_gc_lock);
 
-void fib6_run_gc(unsigned long dummy)
+void fib6_run_gc(unsigned long expires, struct net *net, bool force)
 {
-	if (dummy != ~0UL) {
+	unsigned long now;
+
+	if (force) {
 		spin_lock_bh(&fib6_gc_lock);
-		gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
-	} else {
-		local_bh_disable();
-		if (!spin_trylock(&fib6_gc_lock)) {
-			mod_timer(&ip6_fib_timer, jiffies + HZ);
-			local_bh_enable();
-			return;
-		}
-		gc_args.timeout = ip6_rt_gc_interval;
+	} else if (!spin_trylock_bh(&fib6_gc_lock)) {
+		mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
+		return;
 	}
-	gc_args.more = 0;
+	gc_args.timeout = expires ? (int)expires :
+			  net->ipv6.sysctl.ip6_rt_gc_interval;
 
+	gc_args.more = icmp6_dst_gc();
 
-	write_lock_bh(&rt6_lock);
-	ndisc_dst_gc(&gc_args.more);
-	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
-	write_unlock_bh(&rt6_lock);
+	fib6_clean_all(net, fib6_age, NULL);
+	now = jiffies;
+	net->ipv6.ip6_rt_last_gc = now;
 
 	if (gc_args.more)
-		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
-	else {
-		del_timer(&ip6_fib_timer);
-		ip6_fib_timer.expires = 0;
-	}
+		mod_timer(&net->ipv6.ip6_fib_timer,
+			  round_jiffies(now
+					+ net->ipv6.sysctl.ip6_rt_gc_interval));
+	else
+		del_timer(&net->ipv6.ip6_fib_timer);
 	spin_unlock_bh(&fib6_gc_lock);
 }
 
-void __init fib6_init(void)
+static void fib6_gc_timer_cb(unsigned long arg)
+{
+	fib6_run_gc(0, (struct net *)arg, true);
+}
+
+static int __net_init fib6_net_init(struct net *net)
+{
+	size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+
+	setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
+
+	net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
+	if (!net->ipv6.rt6_stats)
+		goto out_timer;
+
+	/* Avoid false sharing : Use at least a full cache line */
+	size = max_t(size_t, size, L1_CACHE_BYTES);
+
+	net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
+	if (!net->ipv6.fib_table_hash)
+		goto out_rt6_stats;
+
+	net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
+					  GFP_KERNEL);
+	if (!net->ipv6.fib6_main_tbl)
+		goto out_fib_table_hash;
+
+	net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
+	net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+	net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
+		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+	inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
+					   GFP_KERNEL);
+	if (!net->ipv6.fib6_local_tbl)
+		goto out_fib6_main_tbl;
+	net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
+	net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+	net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
+		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+	inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
+#endif
+	fib6_tables_init(net);
+
+	return 0;
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+out_fib6_main_tbl:
+	kfree(net->ipv6.fib6_main_tbl);
+#endif
+out_fib_table_hash:
+	kfree(net->ipv6.fib_table_hash);
+out_rt6_stats:
+	kfree(net->ipv6.rt6_stats);
+out_timer:
+	return -ENOMEM;
+}
+
+static void fib6_net_exit(struct net *net)
 {
+	rt6_ifdown(net, NULL);
+	del_timer_sync(&net->ipv6.ip6_fib_timer);
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
+	kfree(net->ipv6.fib6_local_tbl);
+#endif
+	inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
+	kfree(net->ipv6.fib6_main_tbl);
+	kfree(net->ipv6.fib_table_hash);
+	kfree(net->ipv6.rt6_stats);
+}
+
+static struct pernet_operations fib6_net_ops = {
+	.init = fib6_net_init,
+	.exit = fib6_net_exit,
+};
+
+int __init fib6_init(void)
+{
+	int ret = -ENOMEM;
+
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
 					   sizeof(struct fib6_node),
 					   0, SLAB_HWCACHE_ALIGN,
-					   NULL, NULL);
+					   NULL);
 	if (!fib6_node_kmem)
-		panic("cannot create fib6_nodes cache");
+		goto out;
+
+	ret = register_pernet_subsys(&fib6_net_ops);
+	if (ret)
+		goto out_kmem_cache_create;
+
+	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
+			      NULL);
+	if (ret)
+		goto out_unregister_subsys;
+out:
+	return ret;
+
+out_unregister_subsys:
+	unregister_pernet_subsys(&fib6_net_ops);
+out_kmem_cache_create:
+	kmem_cache_destroy(fib6_node_kmem);
+	goto out;
 }
 
 void fib6_gc_cleanup(void)
 {
-	del_timer(&ip6_fib_timer);
+	unregister_pernet_subsys(&fib6_net_ops);
 	kmem_cache_destroy(fib6_node_kmem);
 }
+
+#ifdef CONFIG_PROC_FS
+
+struct ipv6_route_iter {
+	struct seq_net_private p;
+	struct fib6_walker_t w;
+	loff_t skip;
+	struct fib6_table *tbl;
+	__u32 sernum;
+};
+
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+	struct rt6_info *rt = v;
+	struct ipv6_route_iter *iter = seq->private;
+
+	seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+
+#ifdef CONFIG_IPV6_SUBTREES
+	seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
+#else
+	seq_puts(seq, "00000000000000000000000000000000 00 ");
+#endif
+	if (rt->rt6i_flags & RTF_GATEWAY)
+		seq_printf(seq, "%pi6", &rt->rt6i_gateway);
+	else
+		seq_puts(seq, "00000000000000000000000000000000");
+
+	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
+		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
+		   rt->dst.__use, rt->rt6i_flags,
+		   rt->dst.dev ? rt->dst.dev->name : "");
+	iter->w.leaf = NULL;
+	return 0;
+}
+
+static int ipv6_route_yield(struct fib6_walker_t *w)
+{
+	struct ipv6_route_iter *iter = w->args;
+
+	if (!iter->skip)
+		return 1;
+
+	do {
+		iter->w.leaf = iter->w.leaf->dst.rt6_next;
+		iter->skip--;
+		if (!iter->skip && iter->w.leaf)
+			return 1;
+	} while (iter->w.leaf);
+
+	return 0;
+}
+
+static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
+{
+	memset(&iter->w, 0, sizeof(iter->w));
+	iter->w.func = ipv6_route_yield;
+	iter->w.root = &iter->tbl->tb6_root;
+	iter->w.state = FWS_INIT;
+	iter->w.node = iter->w.root;
+	iter->w.args = iter;
+	iter->sernum = iter->w.root->fn_sernum;
+	INIT_LIST_HEAD(&iter->w.lh);
+	fib6_walker_link(&iter->w);
+}
+
+static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
+						    struct net *net)
+{
+	unsigned int h;
+	struct hlist_node *node;
+
+	if (tbl) {
+		h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
+		node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
+	} else {
+		h = 0;
+		node = NULL;
+	}
+
+	while (!node && h < FIB6_TABLE_HASHSZ) {
+		node = rcu_dereference_bh(
+			hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
+	}
+	return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
+}
+
+static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
+{
+	if (iter->sernum != iter->w.root->fn_sernum) {
+		iter->sernum = iter->w.root->fn_sernum;
+		iter->w.state = FWS_INIT;
+		iter->w.node = iter->w.root;
+		WARN_ON(iter->w.skip);
+		iter->w.skip = iter->w.count;
+	}
+}
+
+static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	int r;
+	struct rt6_info *n;
+	struct net *net = seq_file_net(seq);
+	struct ipv6_route_iter *iter = seq->private;
+
+	if (!v)
+		goto iter_table;
+
+	n = ((struct rt6_info *)v)->dst.rt6_next;
+	if (n) {
+		++*pos;
+		return n;
+	}
+
+iter_table:
+	ipv6_route_check_sernum(iter);
+	read_lock(&iter->tbl->tb6_lock);
+	r = fib6_walk_continue(&iter->w);
+	read_unlock(&iter->tbl->tb6_lock);
+	if (r > 0) {
+		if (v)
+			++*pos;
+		return iter->w.leaf;
+	} else if (r < 0) {
+		fib6_walker_unlink(&iter->w);
+		return NULL;
+	}
+	fib6_walker_unlink(&iter->w);
+
+	iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
+	if (!iter->tbl)
+		return NULL;
+
+	ipv6_route_seq_setup_walk(iter);
+	goto iter_table;
+}
+
+static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU_BH)
+{
+	struct net *net = seq_file_net(seq);
+	struct ipv6_route_iter *iter = seq->private;
+
+	rcu_read_lock_bh();
+	iter->tbl = ipv6_route_seq_next_table(NULL, net);
+	iter->skip = *pos;
+
+	if (iter->tbl) {
+		ipv6_route_seq_setup_walk(iter);
+		return ipv6_route_seq_next(seq, NULL, pos);
+	} else {
+		return NULL;
+	}
+}
+
+static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
+{
+	struct fib6_walker_t *w = &iter->w;
+	return w->node && !(w->state == FWS_U && w->node == w->root);
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU_BH)
+{
+	struct ipv6_route_iter *iter = seq->private;
+
+	if (ipv6_route_iter_active(iter))
+		fib6_walker_unlink(&iter->w);
+
+	rcu_read_unlock_bh();
+}
+
+static const struct seq_operations ipv6_route_seq_ops = {
+	.start	= ipv6_route_seq_start,
+	.next	= ipv6_route_seq_next,
+	.stop	= ipv6_route_seq_stop,
+	.show	= ipv6_route_seq_show
+};
+
+int ipv6_route_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &ipv6_route_seq_ops,
+			    sizeof(struct ipv6_route_iter));
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index bbbe80cdaf7..4052694c6f2 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -9,27 +9,24 @@
  *	Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 
-#include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/net.h>
 #include <linux/netdevice.h>
-#include <linux/if_arp.h>
 #include <linux/in6.h>
-#include <linux/route.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/pid_namespace.h>
 
+#include <net/net_namespace.h>
 #include <net/sock.h>
 
 #include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/protocol.h>
-#include <net/ip6_route.h>
-#include <net/addrconf.h>
 #include <net/rawv6.h>
-#include <net/icmp.h>
 #include <net/transp_v6.h>
 
 #include <asm/uaccess.h>
@@ -37,7 +34,7 @@
 #define FL_MIN_LINGER	6	/* Minimal linger. It is set to 6sec specified
 				   in old IPv6 RFC. Well, it was reasonable value.
 				 */
-#define FL_MAX_LINGER	60	/* Maximal linger timeout */
+#define FL_MAX_LINGER	150	/* Maximal linger timeout */
 
 /* FL hash table */
 
@@ -47,54 +44,71 @@
 #define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)
 
 static atomic_t fl_size = ATOMIC_INIT(0);
-static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
+static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
 
 static void ip6_fl_gc(unsigned long dummy);
 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
 
 /* FL hash table lock: it protects only of GC */
 
-static DEFINE_RWLOCK(ip6_fl_lock);
+static DEFINE_SPINLOCK(ip6_fl_lock);
 
 /* Big socket sock */
 
-static DEFINE_RWLOCK(ip6_sk_fl_lock);
+static DEFINE_SPINLOCK(ip6_sk_fl_lock);
 
+#define for_each_fl_rcu(hash, fl)				\
+	for (fl = rcu_dereference_bh(fl_ht[(hash)]);		\
+	     fl != NULL;					\
+	     fl = rcu_dereference_bh(fl->next))
+#define for_each_fl_continue_rcu(fl)				\
+	for (fl = rcu_dereference_bh(fl->next);			\
+	     fl != NULL;					\
+	     fl = rcu_dereference_bh(fl->next))
 
-static __inline__ struct ip6_flowlabel * __fl_lookup(u32 label)
+#define for_each_sk_fl_rcu(np, sfl)				\
+	for (sfl = rcu_dereference_bh(np->ipv6_fl_list);	\
+	     sfl != NULL;					\
+	     sfl = rcu_dereference_bh(sfl->next))
+
+static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
 {
 	struct ip6_flowlabel *fl;
 
-	for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
-		if (fl->label == label)
+	for_each_fl_rcu(FL_HASH(label), fl) {
+		if (fl->label == label && net_eq(fl->fl_net, net))
 			return fl;
 	}
 	return NULL;
 }
 
-static struct ip6_flowlabel * fl_lookup(u32 label)
+static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
 {
 	struct ip6_flowlabel *fl;
 
-	read_lock_bh(&ip6_fl_lock);
-	fl = __fl_lookup(label);
-	if (fl)
-		atomic_inc(&fl->users);
-	read_unlock_bh(&ip6_fl_lock);
+	rcu_read_lock_bh();
+	fl = __fl_lookup(net, label);
+	if (fl && !atomic_inc_not_zero(&fl->users))
+		fl = NULL;
+	rcu_read_unlock_bh();
 	return fl;
 }
 
 
 static void fl_free(struct ip6_flowlabel *fl)
 {
-	if (fl)
+	if (fl) {
+		if (fl->share == IPV6_FL_S_PROCESS)
+			put_pid(fl->owner.pid);
+		release_net(fl->fl_net);
 		kfree(fl->opt);
-	kfree(fl);
+		kfree_rcu(fl, rcu);
+	}
 }
 
 static void fl_release(struct ip6_flowlabel *fl)
 {
-	write_lock_bh(&ip6_fl_lock);
+	spin_lock_bh(&ip6_fl_lock);
 
 	fl->lastuse = jiffies;
 	if (atomic_dec_and_test(&fl->users)) {
@@ -111,8 +125,7 @@ static void fl_release(struct ip6_flowlabel *fl)
 		    time_after(ip6_fl_gc_timer.expires, ttd))
 			mod_timer(&ip6_fl_gc_timer, ttd);
 	}
-
-	write_unlock_bh(&ip6_fl_lock);
+	spin_unlock_bh(&ip6_fl_lock);
 }
 
 static void ip6_fl_gc(unsigned long dummy)
@@ -121,12 +134,15 @@ static void ip6_fl_gc(unsigned long dummy)
 	unsigned long now = jiffies;
 	unsigned long sched = 0;
 
-	write_lock(&ip6_fl_lock);
+	spin_lock(&ip6_fl_lock);
 
 	for (i=0; i<=FL_HASH_MASK; i++) {
-		struct ip6_flowlabel *fl, **flp;
+		struct ip6_flowlabel *fl;
+		struct ip6_flowlabel __rcu **flp;
+
 		flp = &fl_ht[i];
-		while ((fl=*flp) != NULL) {
+		while ((fl = rcu_dereference_protected(*flp,
+						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
 			if (atomic_read(&fl->users) == 0) {
 				unsigned long ttd = fl->lastuse + fl->linger;
 				if (time_after(ttd, fl->expires))
@@ -147,69 +163,125 @@ static void ip6_fl_gc(unsigned long dummy)
 	if (!sched && atomic_read(&fl_size))
 		sched = now + FL_MAX_LINGER;
 	if (sched) {
-		ip6_fl_gc_timer.expires = sched;
-		add_timer(&ip6_fl_gc_timer);
+		mod_timer(&ip6_fl_gc_timer, sched);
+	}
+	spin_unlock(&ip6_fl_lock);
+}
+
+static void __net_exit ip6_fl_purge(struct net *net)
+{
+	int i;
+
+	spin_lock(&ip6_fl_lock);
+	for (i = 0; i <= FL_HASH_MASK; i++) {
+		struct ip6_flowlabel *fl;
+		struct ip6_flowlabel __rcu **flp;
+
+		flp = &fl_ht[i];
+		while ((fl = rcu_dereference_protected(*flp,
+						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
+			if (net_eq(fl->fl_net, net) &&
+			    atomic_read(&fl->users) == 0) {
+				*flp = fl->next;
+				fl_free(fl);
+				atomic_dec(&fl_size);
+				continue;
+			}
+			flp = &fl->next;
+		}
 	}
-	write_unlock(&ip6_fl_lock);
+	spin_unlock(&ip6_fl_lock);
 }
 
-static int fl_intern(struct ip6_flowlabel *fl, __u32 label)
+static struct ip6_flowlabel *fl_intern(struct net *net,
+				       struct ip6_flowlabel *fl, __be32 label)
 {
+	struct ip6_flowlabel *lfl;
+
 	fl->label = label & IPV6_FLOWLABEL_MASK;
 
-	write_lock_bh(&ip6_fl_lock);
+	spin_lock_bh(&ip6_fl_lock);
 	if (label == 0) {
 		for (;;) {
-			fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
+			fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK;
 			if (fl->label) {
-				struct ip6_flowlabel *lfl;
-				lfl = __fl_lookup(fl->label);
+				lfl = __fl_lookup(net, fl->label);
 				if (lfl == NULL)
 					break;
 			}
 		}
+	} else {
+		/*
+		 * we dropper the ip6_fl_lock, so this entry could reappear
+		 * and we need to recheck with it.
+		 *
+		 * OTOH no need to search the active socket first, like it is
+		 * done in ipv6_flowlabel_opt - sock is locked, so new entry
+		 * with the same label can only appear on another sock
+		 */
+		lfl = __fl_lookup(net, fl->label);
+		if (lfl != NULL) {
+			atomic_inc(&lfl->users);
+			spin_unlock_bh(&ip6_fl_lock);
+			return lfl;
+		}
 	}
 
 	fl->lastuse = jiffies;
 	fl->next = fl_ht[FL_HASH(fl->label)];
-	fl_ht[FL_HASH(fl->label)] = fl;
+	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
 	atomic_inc(&fl_size);
-	write_unlock_bh(&ip6_fl_lock);
-	return 0;
+	spin_unlock_bh(&ip6_fl_lock);
+	return NULL;
 }
 
 
 
 /* Socket flowlabel lists */
 
-struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
+struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 {
 	struct ipv6_fl_socklist *sfl;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 
 	label &= IPV6_FLOWLABEL_MASK;
 
-	for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+	rcu_read_lock_bh();
+	for_each_sk_fl_rcu(np, sfl) {
 		struct ip6_flowlabel *fl = sfl->fl;
 		if (fl->label == label) {
 			fl->lastuse = jiffies;
 			atomic_inc(&fl->users);
+			rcu_read_unlock_bh();
 			return fl;
 		}
 	}
+	rcu_read_unlock_bh();
 	return NULL;
 }
 
+EXPORT_SYMBOL_GPL(fl6_sock_lookup);
+
 void fl6_free_socklist(struct sock *sk)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_fl_socklist *sfl;
 
-	while ((sfl = np->ipv6_fl_list) != NULL) {
+	if (!rcu_access_pointer(np->ipv6_fl_list))
+		return;
+
+	spin_lock_bh(&ip6_sk_fl_lock);
+	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
+						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
 		np->ipv6_fl_list = sfl->next;
+		spin_unlock_bh(&ip6_sk_fl_lock);
+
 		fl_release(sfl->fl);
-		kfree(sfl);
+		kfree_rcu(sfl, rcu);
+
+		spin_lock_bh(&ip6_sk_fl_lock);
 	}
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
 /* Service routines */
@@ -225,20 +297,16 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
 					 struct ip6_flowlabel * fl,
 					 struct ipv6_txoptions * fopt)
 {
-	struct ipv6_txoptions * fl_opt = fl ? fl->opt : NULL;
+	struct ipv6_txoptions * fl_opt = fl->opt;
 
-	if (fopt == NULL || fopt->opt_flen == 0) {
-		if (!fl_opt || !fl_opt->dst0opt || fl_opt->srcrt)
-			return fl_opt;
-	}
+	if (fopt == NULL || fopt->opt_flen == 0)
+		return fl_opt;
 
 	if (fl_opt != NULL) {
 		opt_space->hopopt = fl_opt->hopopt;
-		opt_space->dst0opt = fl_opt->srcrt ? fl_opt->dst0opt : NULL;
+		opt_space->dst0opt = fl_opt->dst0opt;
 		opt_space->srcrt = fl_opt->srcrt;
 		opt_space->opt_nflen = fl_opt->opt_nflen;
-		if (fl_opt->dst0opt && !fl_opt->srcrt)
-			opt_space->opt_nflen -= ipv6_optlen(fl_opt->dst0opt);
 	} else {
 		if (fopt->opt_nflen == 0)
 			return fopt;
@@ -251,6 +319,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
 	opt_space->opt_flen = fopt->opt_flen;
 	return opt_space;
 }
+EXPORT_SYMBOL_GPL(fl6_merge_options);
 
 static unsigned long check_linger(unsigned long ttl)
 {
@@ -269,6 +338,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
 	expires = check_linger(expires);
 	if (!expires)
 		return -EPERM;
+
+	spin_lock_bh(&ip6_fl_lock);
 	fl->lastuse = jiffies;
 	if (time_before(fl->linger, linger))
 		fl->linger = linger;
@@ -276,27 +347,33 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
 		expires = fl->linger;
 	if (time_before(fl->expires, fl->lastuse + expires))
 		fl->expires = fl->lastuse + expires;
+	spin_unlock_bh(&ip6_fl_lock);
+
 	return 0;
 }
 
 static struct ip6_flowlabel *
-fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *err_p)
+fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
+	  char __user *optval, int optlen, int *err_p)
 {
-	struct ip6_flowlabel *fl;
+	struct ip6_flowlabel *fl = NULL;
 	int olen;
 	int addr_type;
 	int err;
 
+	olen = optlen - CMSG_ALIGN(sizeof(*freq));
+	err = -EINVAL;
+	if (olen > 64 * 1024)
+		goto done;
+
 	err = -ENOMEM;
-	fl = kmalloc(sizeof(*fl), GFP_KERNEL);
+	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
 	if (fl == NULL)
 		goto done;
-	memset(fl, 0, sizeof(*fl));
 
-	olen = optlen - CMSG_ALIGN(sizeof(*freq));
 	if (olen > 0) {
 		struct msghdr msg;
-		struct flowi flowi;
+		struct flowi6 flowi6;
 		int junk;
 
 		err = -ENOMEM;
@@ -312,9 +389,10 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *
 
 		msg.msg_controllen = olen;
 		msg.msg_control = (void*)(fl->opt+1);
-		flowi.oif = 0;
+		memset(&flowi6, 0, sizeof(flowi6));
 
-		err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk);
+		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
+					    &junk, &junk, &junk);
 		if (err)
 			goto done;
 		err = -EINVAL;
@@ -326,26 +404,29 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *
 		}
 	}
 
+	fl->fl_net = hold_net(net);
 	fl->expires = jiffies;
 	err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
 	if (err)
 		goto done;
 	fl->share = freq->flr_share;
 	addr_type = ipv6_addr_type(&freq->flr_dst);
-	if ((addr_type&IPV6_ADDR_MAPPED)
-	    || addr_type == IPV6_ADDR_ANY)
+	if ((addr_type & IPV6_ADDR_MAPPED) ||
+	    addr_type == IPV6_ADDR_ANY) {
+		err = -EINVAL;
 		goto done;
-	ipv6_addr_copy(&fl->dst, &freq->flr_dst);
+	}
+	fl->dst = freq->flr_dst;
 	atomic_set(&fl->users, 1);
 	switch (fl->share) {
 	case IPV6_FL_S_EXCL:
 	case IPV6_FL_S_ANY:
 		break;
 	case IPV6_FL_S_PROCESS:
-		fl->owner = current->pid;
+		fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
 		break;
 	case IPV6_FL_S_USER:
-		fl->owner = current->euid;
+		fl->owner.uid = current_euid();
 		break;
 	default:
 		err = -EINVAL;
@@ -369,54 +450,78 @@ static int mem_check(struct sock *sk)
 	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
 		return 0;
 
-	for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+	rcu_read_lock_bh();
+	for_each_sk_fl_rcu(np, sfl)
 		count++;
+	rcu_read_unlock_bh();
 
 	if (room <= 0 ||
 	    ((count >= FL_MAX_PER_SOCK ||
-	     (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4)
-	     && !capable(CAP_NET_ADMIN)))
+	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
+	     !capable(CAP_NET_ADMIN)))
 		return -ENOBUFS;
 
 	return 0;
 }
 
-static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
+static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
+		struct ip6_flowlabel *fl)
 {
-	if (h1 == h2)
-		return 0;
-	if (h1 == NULL || h2 == NULL)
-		return 1;
-	if (h1->hdrlen != h2->hdrlen)
-		return 1;
-	return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
+	spin_lock_bh(&ip6_sk_fl_lock);
+	sfl->fl = fl;
+	sfl->next = np->ipv6_fl_list;
+	rcu_assign_pointer(np->ipv6_fl_list, sfl);
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
-static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
+int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
+			   int flags)
 {
-	if (o1 == o2)
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_fl_socklist *sfl;
+
+	if (flags & IPV6_FL_F_REMOTE) {
+		freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK;
 		return 0;
-	if (o1 == NULL || o2 == NULL)
-		return 1;
-	if (o1->opt_nflen != o2->opt_nflen)
-		return 1;
-	if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
-		return 1;
-	if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
-		return 1;
-	if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
-		return 1;
-	return 0;
+	}
+
+	if (np->repflow) {
+		freq->flr_label = np->flow_label;
+		return 0;
+	}
+
+	rcu_read_lock_bh();
+
+	for_each_sk_fl_rcu(np, sfl) {
+		if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
+			spin_lock_bh(&ip6_fl_lock);
+			freq->flr_label = sfl->fl->label;
+			freq->flr_dst = sfl->fl->dst;
+			freq->flr_share = sfl->fl->share;
+			freq->flr_expires = (sfl->fl->expires - jiffies) / HZ;
+			freq->flr_linger = sfl->fl->linger / HZ;
+
+			spin_unlock_bh(&ip6_fl_lock);
+			rcu_read_unlock_bh();
+			return 0;
+		}
+	}
+	rcu_read_unlock_bh();
+
+	return -ENOENT;
 }
 
 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 {
-	int err;
+	int uninitialized_var(err);
+	struct net *net = sock_net(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_flowlabel_req freq;
 	struct ipv6_fl_socklist *sfl1=NULL;
-	struct ipv6_fl_socklist *sfl, **sflp;
-	struct ip6_flowlabel *fl;
+	struct ipv6_fl_socklist *sfl;
+	struct ipv6_fl_socklist __rcu **sflp;
+	struct ip6_flowlabel *fl, *fl1 = NULL;
+
 
 	if (optlen < sizeof(freq))
 		return -EINVAL;
@@ -426,34 +531,46 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 	switch (freq.flr_action) {
 	case IPV6_FL_A_PUT:
-		write_lock_bh(&ip6_sk_fl_lock);
-		for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+		if (freq.flr_flags & IPV6_FL_F_REFLECT) {
+			if (sk->sk_protocol != IPPROTO_TCP)
+				return -ENOPROTOOPT;
+			if (!np->repflow)
+				return -ESRCH;
+			np->flow_label = 0;
+			np->repflow = 0;
+			return 0;
+		}
+		spin_lock_bh(&ip6_sk_fl_lock);
+		for (sflp = &np->ipv6_fl_list;
+		     (sfl = rcu_dereference(*sflp))!=NULL;
+		     sflp = &sfl->next) {
 			if (sfl->fl->label == freq.flr_label) {
 				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
 					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
-				*sflp = sfl->next;
-				write_unlock_bh(&ip6_sk_fl_lock);
+				*sflp = rcu_dereference(sfl->next);
+				spin_unlock_bh(&ip6_sk_fl_lock);
 				fl_release(sfl->fl);
-				kfree(sfl);
+				kfree_rcu(sfl, rcu);
 				return 0;
 			}
 		}
-		write_unlock_bh(&ip6_sk_fl_lock);
+		spin_unlock_bh(&ip6_sk_fl_lock);
 		return -ESRCH;
 
 	case IPV6_FL_A_RENEW:
-		read_lock_bh(&ip6_sk_fl_lock);
-		for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+		rcu_read_lock_bh();
+		for_each_sk_fl_rcu(np, sfl) {
 			if (sfl->fl->label == freq.flr_label) {
 				err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
-				read_unlock_bh(&ip6_sk_fl_lock);
+				rcu_read_unlock_bh();
 				return err;
 			}
 		}
-		read_unlock_bh(&ip6_sk_fl_lock);
+		rcu_read_unlock_bh();
 
-		if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
-			fl = fl_lookup(freq.flr_label);
+		if (freq.flr_share == IPV6_FL_S_NONE &&
+		    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+			fl = fl_lookup(net, freq.flr_label);
 			if (fl) {
 				err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
 				fl_release(fl);
@@ -463,23 +580,35 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 		return -ESRCH;
 
 	case IPV6_FL_A_GET:
+		if (freq.flr_flags & IPV6_FL_F_REFLECT) {
+			struct net *net = sock_net(sk);
+			if (net->ipv6.sysctl.flowlabel_consistency) {
+				net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
+				return -EPERM;
+			}
+
+			if (sk->sk_protocol != IPPROTO_TCP)
+				return -ENOPROTOOPT;
+
+			np->repflow = 1;
+			return 0;
+		}
+
 		if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
 			return -EINVAL;
 
-		fl = fl_create(&freq, optval, optlen, &err);
+		fl = fl_create(net, sk, &freq, optval, optlen, &err);
 		if (fl == NULL)
 			return err;
 		sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
 
 		if (freq.flr_label) {
-			struct ip6_flowlabel *fl1 = NULL;
-
 			err = -EEXIST;
-			read_lock_bh(&ip6_sk_fl_lock);
-			for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+			rcu_read_lock_bh();
+			for_each_sk_fl_rcu(np, sfl) {
 				if (sfl->fl->label == freq.flr_label) {
 					if (freq.flr_flags&IPV6_FL_F_EXCL) {
-						read_unlock_bh(&ip6_sk_fl_lock);
+						rcu_read_unlock_bh();
 						goto done;
 					}
 					fl1 = sfl->fl;
@@ -487,23 +616,22 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 					break;
 				}
 			}
-			read_unlock_bh(&ip6_sk_fl_lock);
+			rcu_read_unlock_bh();
 
 			if (fl1 == NULL)
-				fl1 = fl_lookup(freq.flr_label);
+				fl1 = fl_lookup(net, freq.flr_label);
 			if (fl1) {
+recheck:
 				err = -EEXIST;
 				if (freq.flr_flags&IPV6_FL_F_EXCL)
 					goto release;
 				err = -EPERM;
 				if (fl1->share == IPV6_FL_S_EXCL ||
 				    fl1->share != fl->share ||
-				    fl1->owner != fl->owner)
-					goto release;
-
-				err = -EINVAL;
-				if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
-				    ipv6_opt_cmp(fl1->opt, fl->opt))
+				    ((fl1->share == IPV6_FL_S_PROCESS) &&
+				     (fl1->owner.pid == fl->owner.pid)) ||
+				    ((fl1->share == IPV6_FL_S_USER) &&
+				     uid_eq(fl1->owner.uid, fl->owner.uid)))
 					goto release;
 
 				err = -ENOMEM;
@@ -513,11 +641,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 					fl1->linger = fl->linger;
 				if ((long)(fl->expires - fl1->expires) > 0)
 					fl1->expires = fl->expires;
-				write_lock_bh(&ip6_sk_fl_lock);
-				sfl1->fl = fl1;
-				sfl1->next = np->ipv6_fl_list;
-				np->ipv6_fl_list = sfl1;
-				write_unlock_bh(&ip6_sk_fl_lock);
+				fl_link(np, sfl1, fl1);
 				fl_free(fl);
 				return 0;
 
@@ -534,9 +658,9 @@ release:
 		if (sfl1 == NULL || (err = mem_check(sk)) != 0)
 			goto done;
 
-		err = fl_intern(fl, freq.flr_label);
-		if (err)
-			goto done;
+		fl1 = fl_intern(net, fl, freq.flr_label);
+		if (fl1 != NULL)
+			goto recheck;
 
 		if (!freq.flr_label) {
 			if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
@@ -545,9 +669,7 @@ release:
 			}
 		}
 
-		sfl1->fl = fl;
-		sfl1->next = np->ipv6_fl_list;
-		np->ipv6_fl_list = sfl1;
+		fl_link(np, sfl1, fl);
 		return 0;
 
 	default:
@@ -563,6 +685,8 @@ done:
 #ifdef CONFIG_PROC_FS
 
 struct ip6fl_iter_state {
+	struct seq_net_private p;
+	struct pid_namespace *pid_ns;
 	int bucket;
 };
 
@@ -572,25 +696,40 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
 {
 	struct ip6_flowlabel *fl = NULL;
 	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+	struct net *net = seq_file_net(seq);
 
 	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
-		if (fl_ht[state->bucket]) {
-			fl = fl_ht[state->bucket];
-			break;
+		for_each_fl_rcu(state->bucket, fl) {
+			if (net_eq(fl->fl_net, net))
+				goto out;
 		}
 	}
+	fl = NULL;
+out:
 	return fl;
 }
 
 static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
 {
 	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+	struct net *net = seq_file_net(seq);
 
-	fl = fl->next;
-	while (!fl) {
-		if (++state->bucket <= FL_HASH_MASK)
-			fl = fl_ht[state->bucket];
+	for_each_fl_continue_rcu(fl) {
+		if (net_eq(fl->fl_net, net))
+			goto out;
 	}
+
+try_again:
+	if (++state->bucket <= FL_HASH_MASK) {
+		for_each_fl_rcu(state->bucket, fl) {
+			if (net_eq(fl->fl_net, net))
+				goto out;
+		}
+		goto try_again;
+	}
+	fl = NULL;
+
+out:
 	return fl;
 }
 
@@ -604,8 +743,9 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
 {
-	read_lock_bh(&ip6_fl_lock);
+	rcu_read_lock_bh();
 	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -622,40 +762,38 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ip6fl_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
 {
-	read_unlock_bh(&ip6_fl_lock);
+	rcu_read_unlock_bh();
 }
 
-static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl)
+static int ip6fl_seq_show(struct seq_file *seq, void *v)
 {
-	while(fl) {
+	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
+			   "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
+	else {
+		struct ip6_flowlabel *fl = v;
 		seq_printf(seq,
-			   "%05X %-1d %-6d %-6d %-6ld %-8ld "
-			   "%02x%02x%02x%02x%02x%02x%02x%02x "
-			   "%-4d\n",
-			   (unsigned)ntohl(fl->label),
+			   "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
+			   (unsigned int)ntohl(fl->label),
 			   fl->share,
-			   (unsigned)fl->owner,
+			   ((fl->share == IPV6_FL_S_PROCESS) ?
+			    pid_nr_ns(fl->owner.pid, state->pid_ns) :
+			    ((fl->share == IPV6_FL_S_USER) ?
+			     from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
+			     0)),
 			   atomic_read(&fl->users),
 			   fl->linger/HZ,
 			   (long)(fl->expires - jiffies)/HZ,
-			   NIP6(fl->dst),
+			   &fl->dst,
 			   fl->opt ? fl->opt->opt_nflen : 0);
-		fl = fl->next;
 	}
-}
-
-static int ip6fl_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "Label S Owner  Users  Linger Expires  "
-			      "Dst                              Opt\n");
-	else
-		ip6fl_fl_seq_show(seq, v);
 	return 0;
 }
 
-static struct seq_operations ip6fl_seq_ops = {
+static const struct seq_operations ip6fl_seq_ops = {
 	.start	=	ip6fl_seq_start,
 	.next	=	ip6fl_seq_next,
 	.stop	=	ip6fl_seq_stop,
@@ -665,47 +803,78 @@ static struct seq_operations ip6fl_seq_ops = {
 static int ip6fl_seq_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
-	int rc = -ENOMEM;
-	struct ip6fl_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct ip6fl_iter_state *state;
+	int err;
 
-	if (!s)
-		goto out;
+	err = seq_open_net(inode, file, &ip6fl_seq_ops,
+			   sizeof(struct ip6fl_iter_state));
 
-	rc = seq_open(file, &ip6fl_seq_ops);
-	if (rc)
-		goto out_kfree;
+	if (!err) {
+		seq = file->private_data;
+		state = ip6fl_seq_private(seq);
+		rcu_read_lock();
+		state->pid_ns = get_pid_ns(task_active_pid_ns(current));
+		rcu_read_unlock();
+	}
+	return err;
+}
 
-	seq = file->private_data;
-	seq->private = s;
-	memset(s, 0, sizeof(*s));
-out:
-	return rc;
-out_kfree:
-	kfree(s);
-	goto out;
+static int ip6fl_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+	put_pid_ns(state->pid_ns);
+	return seq_release_net(inode, file);
 }
 
-static struct file_operations ip6fl_seq_fops = {
+static const struct file_operations ip6fl_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	ip6fl_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	ip6fl_seq_release,
 };
-#endif
 
+static int __net_init ip6_flowlabel_proc_init(struct net *net)
+{
+	if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+			 &ip6fl_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
 
-void ip6_flowlabel_init(void)
+static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
 {
-#ifdef CONFIG_PROC_FS
-	proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
+	remove_proc_entry("ip6_flowlabel", net->proc_net);
+}
+#else
+static inline int ip6_flowlabel_proc_init(struct net *net)
+{
+	return 0;
+}
+static inline void ip6_flowlabel_proc_fini(struct net *net)
+{
+}
 #endif
+
+static void __net_exit ip6_flowlabel_net_exit(struct net *net)
+{
+	ip6_fl_purge(net);
+	ip6_flowlabel_proc_fini(net);
+}
+
+static struct pernet_operations ip6_flowlabel_net_ops = {
+	.init = ip6_flowlabel_proc_init,
+	.exit = ip6_flowlabel_net_exit,
+};
+
+int ip6_flowlabel_init(void)
+{
+	return register_pernet_subsys(&ip6_flowlabel_net_ops);
 }
 
 void ip6_flowlabel_cleanup(void)
 {
 	del_timer(&ip6_fl_gc_timer);
-#ifdef CONFIG_PROC_FS
-	proc_net_remove("ip6_flowlabel");
-#endif
+	unregister_pernet_subsys(&ip6_flowlabel_net_ops);
 }
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
new file mode 100644
index 00000000000..3873181ed85
--- /dev/null
+++ b/net/ipv6/ip6_gre.c
@@ -0,0 +1,1722 @@
+/*
+ *	GRE over IPv6 protocol decoder.
+ *
+ *	Authors: Dmitry Kozlov (xeb@mail.ru)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/hash.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/addrconf.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
+
+
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
+#define HASH_SIZE_SHIFT  5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static int ip6gre_net_id __read_mostly;
+struct ip6gre_net {
+	struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+
+	struct net_device *fb_tunnel_dev;
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
+static int ip6gre_tunnel_init(struct net_device *dev);
+static void ip6gre_tunnel_setup(struct net_device *dev);
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+
+/* Tunnel hash table */
+
+/*
+   4 hash tables:
+
+   3: (remote,local)
+   2: (remote,*)
+   1: (*,local)
+   0: (*,*)
+
+   We require exact key match i.e. if a key is present in packet
+   it will match only tunnel with the same key; if it is not present,
+   it will match only keyless tunnel.
+
+   All keysless packets, if not matched configured keyless tunnels
+   will match fallback tunnel.
+ */
+
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+static u32 HASH_ADDR(const struct in6_addr *addr)
+{
+	u32 hash = ipv6_addr_hash(addr);
+
+	return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+#define tunnels_r_l	tunnels[3]
+#define tunnels_r	tunnels[2]
+#define tunnels_l	tunnels[1]
+#define tunnels_wc	tunnels[0]
+
+/* Given src, dst and key, find appropriate for input tunnel. */
+
+static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
+		const struct in6_addr *remote, const struct in6_addr *local,
+		__be32 key, __be16 gre_proto)
+{
+	struct net *net = dev_net(dev);
+	int link = dev->ifindex;
+	unsigned int h0 = HASH_ADDR(remote);
+	unsigned int h1 = HASH_KEY(key);
+	struct ip6_tnl *t, *cand = NULL;
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+		       ARPHRD_ETHER : ARPHRD_IP6GRE;
+	int score, cand_score = 4;
+
+	for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
+		if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+		    !ipv6_addr_equal(remote, &t->parms.raddr) ||
+		    key != t->parms.i_key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IP6GRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		score = 0;
+		if (t->parms.link != link)
+			score |= 1;
+		if (t->dev->type != dev_type)
+			score |= 2;
+		if (score == 0)
+			return t;
+
+		if (score < cand_score) {
+			cand = t;
+			cand_score = score;
+		}
+	}
+
+	for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
+		if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+		    key != t->parms.i_key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IP6GRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		score = 0;
+		if (t->parms.link != link)
+			score |= 1;
+		if (t->dev->type != dev_type)
+			score |= 2;
+		if (score == 0)
+			return t;
+
+		if (score < cand_score) {
+			cand = t;
+			cand_score = score;
+		}
+	}
+
+	for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
+		if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
+			  (!ipv6_addr_equal(local, &t->parms.raddr) ||
+				 !ipv6_addr_is_multicast(local))) ||
+		    key != t->parms.i_key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IP6GRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		score = 0;
+		if (t->parms.link != link)
+			score |= 1;
+		if (t->dev->type != dev_type)
+			score |= 2;
+		if (score == 0)
+			return t;
+
+		if (score < cand_score) {
+			cand = t;
+			cand_score = score;
+		}
+	}
+
+	for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
+		if (t->parms.i_key != key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IP6GRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		score = 0;
+		if (t->parms.link != link)
+			score |= 1;
+		if (t->dev->type != dev_type)
+			score |= 2;
+		if (score == 0)
+			return t;
+
+		if (score < cand_score) {
+			cand = t;
+			cand_score = score;
+		}
+	}
+
+	if (cand != NULL)
+		return cand;
+
+	dev = ign->fb_tunnel_dev;
+	if (dev->flags & IFF_UP)
+		return netdev_priv(dev);
+
+	return NULL;
+}
+
+static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
+		const struct __ip6_tnl_parm *p)
+{
+	const struct in6_addr *remote = &p->raddr;
+	const struct in6_addr *local = &p->laddr;
+	unsigned int h = HASH_KEY(p->i_key);
+	int prio = 0;
+
+	if (!ipv6_addr_any(local))
+		prio |= 1;
+	if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
+		prio |= 2;
+		h ^= HASH_ADDR(remote);
+	}
+
+	return &ign->tunnels[prio][h];
+}
+
+static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
+		const struct ip6_tnl *t)
+{
+	return __ip6gre_bucket(ign, &t->parms);
+}
+
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+	struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+
+	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+	rcu_assign_pointer(*tp, t);
+}
+
+static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+	struct ip6_tnl __rcu **tp;
+	struct ip6_tnl *iter;
+
+	for (tp = ip6gre_bucket(ign, t);
+	     (iter = rtnl_dereference(*tp)) != NULL;
+	     tp = &iter->next) {
+		if (t == iter) {
+			rcu_assign_pointer(*tp, t->next);
+			break;
+		}
+	}
+}
+
+static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
+					   const struct __ip6_tnl_parm *parms,
+					   int type)
+{
+	const struct in6_addr *remote = &parms->raddr;
+	const struct in6_addr *local = &parms->laddr;
+	__be32 key = parms->i_key;
+	int link = parms->link;
+	struct ip6_tnl *t;
+	struct ip6_tnl __rcu **tp;
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+	for (tp = __ip6gre_bucket(ign, parms);
+	     (t = rtnl_dereference(*tp)) != NULL;
+	     tp = &t->next)
+		if (ipv6_addr_equal(local, &t->parms.laddr) &&
+		    ipv6_addr_equal(remote, &t->parms.raddr) &&
+		    key == t->parms.i_key &&
+		    link == t->parms.link &&
+		    type == t->dev->type)
+			break;
+
+	return t;
+}
+
+static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
+		const struct __ip6_tnl_parm *parms, int create)
+{
+	struct ip6_tnl *t, *nt;
+	struct net_device *dev;
+	char name[IFNAMSIZ];
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+	t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
+	if (t || !create)
+		return t;
+
+	if (parms->name[0])
+		strlcpy(name, parms->name, IFNAMSIZ);
+	else
+		strcpy(name, "ip6gre%d");
+
+	dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
+	if (!dev)
+		return NULL;
+
+	dev_net_set(dev, net);
+
+	nt = netdev_priv(dev);
+	nt->parms = *parms;
+	dev->rtnl_link_ops = &ip6gre_link_ops;
+
+	nt->dev = dev;
+	nt->net = dev_net(dev);
+	ip6gre_tnl_link_config(nt, 1);
+
+	if (register_netdevice(dev) < 0)
+		goto failed_free;
+
+	/* Can use a lockless transmit, unless we generate output sequences */
+	if (!(nt->parms.o_flags & GRE_SEQ))
+		dev->features |= NETIF_F_LLTX;
+
+	dev_hold(dev);
+	ip6gre_tunnel_link(ign, nt);
+	return nt;
+
+failed_free:
+	free_netdev(dev);
+	return NULL;
+}
+
+static void ip6gre_tunnel_uninit(struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+
+	ip6gre_tunnel_unlink(ign, t);
+	dev_put(dev);
+}
+
+
+static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		u8 type, u8 code, int offset, __be32 info)
+{
+	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+	__be16 *p = (__be16 *)(skb->data + offset);
+	int grehlen = offset + 4;
+	struct ip6_tnl *t;
+	__be16 flags;
+
+	flags = p[0];
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			return;
+		if (flags&GRE_KEY) {
+			grehlen += 4;
+			if (flags&GRE_CSUM)
+				grehlen += 4;
+		}
+	}
+
+	/* If only 8 bytes returned, keyed message will be dropped here */
+	if (!pskb_may_pull(skb, grehlen))
+		return;
+	ipv6h = (const struct ipv6hdr *)skb->data;
+	p = (__be16 *)(skb->data + offset);
+
+	t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
+				flags & GRE_KEY ?
+				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
+				p[1]);
+	if (t == NULL)
+		return;
+
+	switch (type) {
+		__u32 teli;
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		__u32 mtu;
+	case ICMPV6_DEST_UNREACH:
+		net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
+				     t->parms.name);
+		break;
+	case ICMPV6_TIME_EXCEED:
+		if (code == ICMPV6_EXC_HOPLIMIT) {
+			net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+					     t->parms.name);
+		}
+		break;
+	case ICMPV6_PARAMPROB:
+		teli = 0;
+		if (code == ICMPV6_HDR_FIELD)
+			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
+
+		if (teli && teli == info - 2) {
+			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
+			if (tel->encap_limit == 0) {
+				net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+						     t->parms.name);
+			}
+		} else {
+			net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+					     t->parms.name);
+		}
+		break;
+	case ICMPV6_PKT_TOOBIG:
+		mtu = info - offset;
+		if (mtu < IPV6_MIN_MTU)
+			mtu = IPV6_MIN_MTU;
+		t->dev->mtu = mtu;
+		break;
+	}
+
+	if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
+		t->err_count++;
+	else
+		t->err_count = 1;
+	t->err_time = jiffies;
+}
+
+static int ip6gre_rcv(struct sk_buff *skb)
+{
+	const struct ipv6hdr *ipv6h;
+	u8     *h;
+	__be16    flags;
+	__sum16   csum = 0;
+	__be32 key = 0;
+	u32    seqno = 0;
+	struct ip6_tnl *tunnel;
+	int    offset = 4;
+	__be16 gre_proto;
+	int err;
+
+	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+		goto drop;
+
+	ipv6h = ipv6_hdr(skb);
+	h = skb->data;
+	flags = *(__be16 *)h;
+
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
+		/* - Version must be 0.
+		   - We do not support routing headers.
+		 */
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			goto drop;
+
+		if (flags&GRE_CSUM) {
+			csum = skb_checksum_simple_validate(skb);
+			offset += 4;
+		}
+		if (flags&GRE_KEY) {
+			key = *(__be32 *)(h + offset);
+			offset += 4;
+		}
+		if (flags&GRE_SEQ) {
+			seqno = ntohl(*(__be32 *)(h + offset));
+			offset += 4;
+		}
+	}
+
+	gre_proto = *(__be16 *)(h + 2);
+
+	tunnel = ip6gre_tunnel_lookup(skb->dev,
+					  &ipv6h->saddr, &ipv6h->daddr, key,
+					  gre_proto);
+	if (tunnel) {
+		struct pcpu_sw_netstats *tstats;
+
+		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+			goto drop;
+
+		if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
+			tunnel->dev->stats.rx_dropped++;
+			goto drop;
+		}
+
+		skb->protocol = gre_proto;
+		/* WCCP version 1 and 2 protocol decoding.
+		 * - Change protocol to IP
+		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+		 */
+		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
+			skb->protocol = htons(ETH_P_IP);
+			if ((*(h + offset) & 0xF0) != 0x40)
+				offset += 4;
+		}
+
+		skb->mac_header = skb->network_header;
+		__pskb_pull(skb, offset);
+		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
+
+		if (((flags&GRE_CSUM) && csum) ||
+		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+			tunnel->dev->stats.rx_crc_errors++;
+			tunnel->dev->stats.rx_errors++;
+			goto drop;
+		}
+		if (tunnel->parms.i_flags&GRE_SEQ) {
+			if (!(flags&GRE_SEQ) ||
+			    (tunnel->i_seqno &&
+					(s32)(seqno - tunnel->i_seqno) < 0)) {
+				tunnel->dev->stats.rx_fifo_errors++;
+				tunnel->dev->stats.rx_errors++;
+				goto drop;
+			}
+			tunnel->i_seqno = seqno + 1;
+		}
+
+		/* Warning: All skb pointers will be invalidated! */
+		if (tunnel->dev->type == ARPHRD_ETHER) {
+			if (!pskb_may_pull(skb, ETH_HLEN)) {
+				tunnel->dev->stats.rx_length_errors++;
+				tunnel->dev->stats.rx_errors++;
+				goto drop;
+			}
+
+			ipv6h = ipv6_hdr(skb);
+			skb->protocol = eth_type_trans(skb, tunnel->dev);
+			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+		}
+
+		__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+
+		skb_reset_network_header(skb);
+
+		err = IP6_ECN_decapsulate(ipv6h, skb);
+		if (unlikely(err)) {
+			if (log_ecn_error)
+				net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
+						     &ipv6h->saddr,
+						     ipv6_get_dsfield(ipv6h));
+			if (err > 1) {
+				++tunnel->dev->stats.rx_frame_errors;
+				++tunnel->dev->stats.rx_errors;
+				goto drop;
+			}
+		}
+
+		tstats = this_cpu_ptr(tunnel->dev->tstats);
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->rx_packets++;
+		tstats->rx_bytes += skb->len;
+		u64_stats_update_end(&tstats->syncp);
+
+		netif_rx(skb);
+
+		return 0;
+	}
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+struct ipv6_tel_txoption {
+	struct ipv6_txoptions ops;
+	__u8 dst_opt[8];
+};
+
+static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+{
+	memset(opt, 0, sizeof(struct ipv6_tel_txoption));
+
+	opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
+	opt->dst_opt[3] = 1;
+	opt->dst_opt[4] = encap_limit;
+	opt->dst_opt[5] = IPV6_TLV_PADN;
+	opt->dst_opt[6] = 1;
+
+	opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
+	opt->ops.opt_nflen = 8;
+}
+
+static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
+			 struct net_device *dev,
+			 __u8 dsfield,
+			 struct flowi6 *fl6,
+			 int encap_limit,
+			 __u32 *pmtu)
+{
+	struct ip6_tnl *tunnel = netdev_priv(dev);
+	struct net *net = tunnel->net;
+	struct net_device *tdev;    /* Device to other host */
+	struct ipv6hdr  *ipv6h;     /* Our new IP header */
+	unsigned int max_headroom = 0; /* The extra header space needed */
+	int    gre_hlen;
+	struct ipv6_tel_txoption opt;
+	int    mtu;
+	struct dst_entry *dst = NULL, *ndst = NULL;
+	struct net_device_stats *stats = &tunnel->dev->stats;
+	int err = -1;
+	u8 proto;
+	struct sk_buff *new_skb;
+
+	if (dev->type == ARPHRD_ETHER)
+		IPCB(skb)->flags = 0;
+
+	if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
+		gre_hlen = 0;
+		ipv6h = (struct ipv6hdr *)skb->data;
+		fl6->daddr = ipv6h->daddr;
+	} else {
+		gre_hlen = tunnel->hlen;
+		fl6->daddr = tunnel->parms.raddr;
+	}
+
+	if (!fl6->flowi6_mark)
+		dst = ip6_tnl_dst_check(tunnel);
+
+	if (!dst) {
+		ndst = ip6_route_output(net, NULL, fl6);
+
+		if (ndst->error)
+			goto tx_err_link_failure;
+		ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
+		if (IS_ERR(ndst)) {
+			err = PTR_ERR(ndst);
+			ndst = NULL;
+			goto tx_err_link_failure;
+		}
+		dst = ndst;
+	}
+
+	tdev = dst->dev;
+
+	if (tdev == dev) {
+		stats->collisions++;
+		net_warn_ratelimited("%s: Local routing loop detected!\n",
+				     tunnel->parms.name);
+		goto tx_err_dst_release;
+	}
+
+	mtu = dst_mtu(dst) - sizeof(*ipv6h);
+	if (encap_limit >= 0) {
+		max_headroom += 8;
+		mtu -= 8;
+	}
+	if (mtu < IPV6_MIN_MTU)
+		mtu = IPV6_MIN_MTU;
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+	if (skb->len > mtu) {
+		*pmtu = mtu;
+		err = -EMSGSIZE;
+		goto tx_err_dst_release;
+	}
+
+	if (tunnel->err_count > 0) {
+		if (time_before(jiffies,
+				tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
+			tunnel->err_count--;
+
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
+
+	max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+
+	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
+		new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (max_headroom > dev->needed_headroom)
+			dev->needed_headroom = max_headroom;
+		if (!new_skb)
+			goto tx_err_dst_release;
+
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		consume_skb(skb);
+		skb = new_skb;
+	}
+
+	if (fl6->flowi6_mark) {
+		skb_dst_set(skb, dst);
+		ndst = NULL;
+	} else {
+		skb_dst_set_noref(skb, dst);
+	}
+
+	proto = NEXTHDR_GRE;
+	if (encap_limit >= 0) {
+		init_tel_txopt(&opt, encap_limit);
+		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+	}
+
+	if (likely(!skb->encapsulation)) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+	}
+
+	skb_push(skb, gre_hlen);
+	skb_reset_network_header(skb);
+	skb_set_transport_header(skb, sizeof(*ipv6h));
+
+	/*
+	 *	Push down and install the IP header.
+	 */
+	ipv6h = ipv6_hdr(skb);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+	ipv6h->hop_limit = tunnel->parms.hop_limit;
+	ipv6h->nexthdr = proto;
+	ipv6h->saddr = fl6->saddr;
+	ipv6h->daddr = fl6->daddr;
+
+	((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
+	((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
+				   htons(ETH_P_TEB) : skb->protocol;
+
+	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+		__be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
+
+		if (tunnel->parms.o_flags&GRE_SEQ) {
+			++tunnel->o_seqno;
+			*ptr = htonl(tunnel->o_seqno);
+			ptr--;
+		}
+		if (tunnel->parms.o_flags&GRE_KEY) {
+			*ptr = tunnel->parms.o_key;
+			ptr--;
+		}
+		if (tunnel->parms.o_flags&GRE_CSUM) {
+			*ptr = 0;
+			*(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
+				skb->len - sizeof(struct ipv6hdr));
+		}
+	}
+
+	ip6tunnel_xmit(skb, dev);
+	if (ndst)
+		ip6_tnl_dst_store(tunnel, ndst);
+	return 0;
+tx_err_link_failure:
+	stats->tx_carrier_errors++;
+	dst_link_failure(skb);
+tx_err_dst_release:
+	dst_release(ndst);
+	return err;
+}
+
+static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	const struct iphdr  *iph = ip_hdr(skb);
+	int encap_limit = -1;
+	struct flowi6 fl6;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
+
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+	fl6.flowi6_proto = IPPROTO_IPIP;
+
+	dsfield = ipv4_get_dsfield(iph);
+
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+					  & IPV6_TCLASS_MASK;
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+		fl6.flowi6_mark = skb->mark;
+
+	err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+	if (err != 0) {
+		/* XXX: send ICMP error even if DF is not set. */
+		if (err == -EMSGSIZE)
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	int encap_limit = -1;
+	__u16 offset;
+	struct flowi6 fl6;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
+
+	if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+		return -1;
+
+	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+	if (offset > 0) {
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+		if (tel->encap_limit == 0) {
+			icmpv6_send(skb, ICMPV6_PARAMPROB,
+				    ICMPV6_HDR_FIELD, offset + 2);
+			return -1;
+		}
+		encap_limit = tel->encap_limit - 1;
+	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+	fl6.flowi6_proto = IPPROTO_IPV6;
+
+	dsfield = ipv6_get_dsfield(ipv6h);
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+		fl6.flowlabel |= ip6_flowlabel(ipv6h);
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+		fl6.flowi6_mark = skb->mark;
+
+	err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+	if (err != 0) {
+		if (err == -EMSGSIZE)
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ *   @t: the outgoing tunnel device
+ *   @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ *   Avoid trivial tunneling loop by checking that tunnel exit-point
+ *   doesn't match source of incoming packet.
+ *
+ * Return:
+ *   1 if conflict,
+ *   0 else
+ **/
+
+static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
+	const struct ipv6hdr *hdr)
+{
+	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	int encap_limit = -1;
+	struct flowi6 fl6;
+	__u32 mtu;
+	int err;
+
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+	fl6.flowi6_proto = skb->protocol;
+
+	err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
+
+	return err;
+}
+
+static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
+	struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net_device_stats *stats = &t->dev->stats;
+	int ret;
+
+	if (!ip6_tnl_xmit_ctl(t))
+		goto tx_err;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ret = ip6gre_xmit_ipv4(skb, dev);
+		break;
+	case htons(ETH_P_IPV6):
+		ret = ip6gre_xmit_ipv6(skb, dev);
+		break;
+	default:
+		ret = ip6gre_xmit_other(skb, dev);
+		break;
+	}
+
+	if (ret < 0)
+		goto tx_err;
+
+	return NETDEV_TX_OK;
+
+tx_err:
+	stats->tx_errors++;
+	stats->tx_dropped++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+	struct net_device *dev = t->dev;
+	struct __ip6_tnl_parm *p = &t->parms;
+	struct flowi6 *fl6 = &t->fl.u.ip6;
+	int addend = sizeof(struct ipv6hdr) + 4;
+
+	if (dev->type != ARPHRD_ETHER) {
+		memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+		memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+	}
+
+	/* Set up flowi template */
+	fl6->saddr = p->laddr;
+	fl6->daddr = p->raddr;
+	fl6->flowi6_oif = p->link;
+	fl6->flowlabel = 0;
+
+	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+		fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
+		fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+
+	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+	p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
+
+	if (p->flags&IP6_TNL_F_CAP_XMIT &&
+			p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
+		dev->flags |= IFF_POINTOPOINT;
+	else
+		dev->flags &= ~IFF_POINTOPOINT;
+
+	dev->iflink = p->link;
+
+	/* Precalculate GRE options length */
+	if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+		if (t->parms.o_flags&GRE_CSUM)
+			addend += 4;
+		if (t->parms.o_flags&GRE_KEY)
+			addend += 4;
+		if (t->parms.o_flags&GRE_SEQ)
+			addend += 4;
+	}
+	t->hlen = addend;
+
+	if (p->flags & IP6_TNL_F_CAP_XMIT) {
+		int strict = (ipv6_addr_type(&p->raddr) &
+			      (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
+
+		struct rt6_info *rt = rt6_lookup(t->net,
+						 &p->raddr, &p->laddr,
+						 p->link, strict);
+
+		if (rt == NULL)
+			return;
+
+		if (rt->dst.dev) {
+			dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
+
+			if (set_mtu) {
+				dev->mtu = rt->dst.dev->mtu - addend;
+				if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+					dev->mtu -= 8;
+
+				if (dev->mtu < IPV6_MIN_MTU)
+					dev->mtu = IPV6_MIN_MTU;
+			}
+		}
+		ip6_rt_put(rt);
+	}
+}
+
+static int ip6gre_tnl_change(struct ip6_tnl *t,
+	const struct __ip6_tnl_parm *p, int set_mtu)
+{
+	t->parms.laddr = p->laddr;
+	t->parms.raddr = p->raddr;
+	t->parms.flags = p->flags;
+	t->parms.hop_limit = p->hop_limit;
+	t->parms.encap_limit = p->encap_limit;
+	t->parms.flowinfo = p->flowinfo;
+	t->parms.link = p->link;
+	t->parms.proto = p->proto;
+	t->parms.i_key = p->i_key;
+	t->parms.o_key = p->o_key;
+	t->parms.i_flags = p->i_flags;
+	t->parms.o_flags = p->o_flags;
+	ip6_tnl_dst_reset(t);
+	ip6gre_tnl_link_config(t, set_mtu);
+	return 0;
+}
+
+static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
+	const struct ip6_tnl_parm2 *u)
+{
+	p->laddr = u->laddr;
+	p->raddr = u->raddr;
+	p->flags = u->flags;
+	p->hop_limit = u->hop_limit;
+	p->encap_limit = u->encap_limit;
+	p->flowinfo = u->flowinfo;
+	p->link = u->link;
+	p->i_key = u->i_key;
+	p->o_key = u->o_key;
+	p->i_flags = u->i_flags;
+	p->o_flags = u->o_flags;
+	memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
+	const struct __ip6_tnl_parm *p)
+{
+	u->proto = IPPROTO_GRE;
+	u->laddr = p->laddr;
+	u->raddr = p->raddr;
+	u->flags = p->flags;
+	u->hop_limit = p->hop_limit;
+	u->encap_limit = p->encap_limit;
+	u->flowinfo = p->flowinfo;
+	u->link = p->link;
+	u->i_key = p->i_key;
+	u->o_key = p->o_key;
+	u->i_flags = p->i_flags;
+	u->o_flags = p->o_flags;
+	memcpy(u->name, p->name, sizeof(u->name));
+}
+
+static int ip6gre_tunnel_ioctl(struct net_device *dev,
+	struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	struct ip6_tnl_parm2 p;
+	struct __ip6_tnl_parm p1;
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = t->net;
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		if (dev == ign->fb_tunnel_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+				err = -EFAULT;
+				break;
+			}
+			ip6gre_tnl_parm_from_user(&p1, &p);
+			t = ip6gre_tunnel_locate(net, &p1, 0);
+			if (t == NULL)
+				t = netdev_priv(dev);
+		}
+		memset(&p, 0, sizeof(p));
+		ip6gre_tnl_parm_to_user(&p, &t->parms);
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+			err = -EFAULT;
+		break;
+
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			goto done;
+
+		err = -EINVAL;
+		if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
+			goto done;
+
+		if (!(p.i_flags&GRE_KEY))
+			p.i_key = 0;
+		if (!(p.o_flags&GRE_KEY))
+			p.o_key = 0;
+
+		ip6gre_tnl_parm_from_user(&p1, &p);
+		t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
+
+		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else {
+				t = netdev_priv(dev);
+
+				ip6gre_tunnel_unlink(ign, t);
+				synchronize_net();
+				ip6gre_tnl_change(t, &p1, 1);
+				ip6gre_tunnel_link(ign, t);
+				netdev_state_change(dev);
+			}
+		}
+
+		if (t) {
+			err = 0;
+
+			memset(&p, 0, sizeof(p));
+			ip6gre_tnl_parm_to_user(&p, &t->parms);
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+				err = -EFAULT;
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			goto done;
+
+		if (dev == ign->fb_tunnel_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+				goto done;
+			err = -ENOENT;
+			ip6gre_tnl_parm_from_user(&p1, &p);
+			t = ip6gre_tunnel_locate(net, &p1, 0);
+			if (t == NULL)
+				goto done;
+			err = -EPERM;
+			if (t == netdev_priv(ign->fb_tunnel_dev))
+				goto done;
+			dev = t->dev;
+		}
+		unregister_netdevice(dev);
+		err = 0;
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+done:
+	return err;
+}
+
+static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu < 68 ||
+	    new_mtu > 0xFFF8 - dev->hard_header_len)
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
+			unsigned short type,
+			const void *daddr, const void *saddr, unsigned int len)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
+	__be16 *p = (__be16 *)(ipv6h+1);
+
+	ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
+	ipv6h->hop_limit = t->parms.hop_limit;
+	ipv6h->nexthdr = NEXTHDR_GRE;
+	ipv6h->saddr = t->parms.laddr;
+	ipv6h->daddr = t->parms.raddr;
+
+	p[0]		= t->parms.o_flags;
+	p[1]		= htons(type);
+
+	/*
+	 *	Set the source hardware address.
+	 */
+
+	if (saddr)
+		memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
+	if (daddr)
+		memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
+	if (!ipv6_addr_any(&ipv6h->daddr))
+		return t->hlen;
+
+	return -t->hlen;
+}
+
+static const struct header_ops ip6gre_header_ops = {
+	.create	= ip6gre_header,
+};
+
+static const struct net_device_ops ip6gre_netdev_ops = {
+	.ndo_init		= ip6gre_tunnel_init,
+	.ndo_uninit		= ip6gre_tunnel_uninit,
+	.ndo_start_xmit		= ip6gre_tunnel_xmit,
+	.ndo_do_ioctl		= ip6gre_tunnel_ioctl,
+	.ndo_change_mtu		= ip6gre_tunnel_change_mtu,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
+};
+
+static void ip6gre_dev_free(struct net_device *dev)
+{
+	free_percpu(dev->tstats);
+	free_netdev(dev);
+}
+
+static void ip6gre_tunnel_setup(struct net_device *dev)
+{
+	struct ip6_tnl *t;
+
+	dev->netdev_ops = &ip6gre_netdev_ops;
+	dev->destructor = ip6gre_dev_free;
+
+	dev->type = ARPHRD_IP6GRE;
+	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
+	dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
+	t = netdev_priv(dev);
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		dev->mtu -= 8;
+	dev->flags |= IFF_NOARP;
+	dev->iflink = 0;
+	dev->addr_len = sizeof(struct in6_addr);
+	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+static int ip6gre_tunnel_init(struct net_device *dev)
+{
+	struct ip6_tnl *tunnel;
+	int i;
+
+	tunnel = netdev_priv(dev);
+
+	tunnel->dev = dev;
+	tunnel->net = dev_net(dev);
+	strcpy(tunnel->parms.name, dev->name);
+
+	memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
+	memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
+
+	if (ipv6_addr_any(&tunnel->parms.raddr))
+		dev->header_ops = &ip6gre_header_ops;
+
+	dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	for_each_possible_cpu(i) {
+		struct pcpu_sw_netstats *ip6gre_tunnel_stats;
+		ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i);
+		u64_stats_init(&ip6gre_tunnel_stats->syncp);
+	}
+
+
+	return 0;
+}
+
+static void ip6gre_fb_tunnel_init(struct net_device *dev)
+{
+	struct ip6_tnl *tunnel = netdev_priv(dev);
+
+	tunnel->dev = dev;
+	tunnel->net = dev_net(dev);
+	strcpy(tunnel->parms.name, dev->name);
+
+	tunnel->hlen		= sizeof(struct ipv6hdr) + 4;
+
+	dev_hold(dev);
+}
+
+
+static struct inet6_protocol ip6gre_protocol __read_mostly = {
+	.handler     = ip6gre_rcv,
+	.err_handler = ip6gre_err,
+	.flags       = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
+{
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	struct net_device *dev, *aux;
+	int prio;
+
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == &ip6gre_link_ops ||
+		    dev->rtnl_link_ops == &ip6gre_tap_ops)
+			unregister_netdevice_queue(dev, head);
+
+	for (prio = 0; prio < 4; prio++) {
+		int h;
+		for (h = 0; h < HASH_SIZE; h++) {
+			struct ip6_tnl *t;
+
+			t = rtnl_dereference(ign->tunnels[prio][h]);
+
+			while (t != NULL) {
+				/* If dev is in the same netns, it has already
+				 * been added to the list by the previous loop.
+				 */
+				if (!net_eq(dev_net(t->dev), net))
+					unregister_netdevice_queue(t->dev,
+								   head);
+				t = rtnl_dereference(t->next);
+			}
+		}
+	}
+}
+
+static int __net_init ip6gre_init_net(struct net *net)
+{
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	int err;
+
+	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
+					   ip6gre_tunnel_setup);
+	if (!ign->fb_tunnel_dev) {
+		err = -ENOMEM;
+		goto err_alloc_dev;
+	}
+	dev_net_set(ign->fb_tunnel_dev, net);
+	/* FB netdevice is special: we have one, and only one per netns.
+	 * Allowing to move it to another netns is clearly unsafe.
+	 */
+	ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+
+
+	ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
+	ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
+
+	err = register_netdev(ign->fb_tunnel_dev);
+	if (err)
+		goto err_reg_dev;
+
+	rcu_assign_pointer(ign->tunnels_wc[0],
+			   netdev_priv(ign->fb_tunnel_dev));
+	return 0;
+
+err_reg_dev:
+	ip6gre_dev_free(ign->fb_tunnel_dev);
+err_alloc_dev:
+	return err;
+}
+
+static void __net_exit ip6gre_exit_net(struct net *net)
+{
+	LIST_HEAD(list);
+
+	rtnl_lock();
+	ip6gre_destroy_tunnels(net, &list);
+	unregister_netdevice_many(&list);
+	rtnl_unlock();
+}
+
+static struct pernet_operations ip6gre_net_ops = {
+	.init = ip6gre_init_net,
+	.exit = ip6gre_exit_net,
+	.id   = &ip6gre_net_id,
+	.size = sizeof(struct ip6gre_net),
+};
+
+static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	__be16 flags;
+
+	if (!data)
+		return 0;
+
+	flags = 0;
+	if (data[IFLA_GRE_IFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (data[IFLA_GRE_OFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	if (flags & (GRE_VERSION|GRE_ROUTING))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	struct in6_addr daddr;
+
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+
+	if (!data)
+		goto out;
+
+	if (data[IFLA_GRE_REMOTE]) {
+		nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+		if (ipv6_addr_any(&daddr))
+			return -EINVAL;
+	}
+
+out:
+	return ip6gre_tunnel_validate(tb, data);
+}
+
+
+static void ip6gre_netlink_parms(struct nlattr *data[],
+				struct __ip6_tnl_parm *parms)
+{
+	memset(parms, 0, sizeof(*parms));
+
+	if (!data)
+		return;
+
+	if (data[IFLA_GRE_LINK])
+		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+
+	if (data[IFLA_GRE_IFLAGS])
+		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+
+	if (data[IFLA_GRE_OFLAGS])
+		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+
+	if (data[IFLA_GRE_IKEY])
+		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+
+	if (data[IFLA_GRE_OKEY])
+		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+
+	if (data[IFLA_GRE_LOCAL])
+		nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
+
+	if (data[IFLA_GRE_REMOTE])
+		nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+
+	if (data[IFLA_GRE_TTL])
+		parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
+
+	if (data[IFLA_GRE_ENCAP_LIMIT])
+		parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
+
+	if (data[IFLA_GRE_FLOWINFO])
+		parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+
+	if (data[IFLA_GRE_FLAGS])
+		parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
+}
+
+static int ip6gre_tap_init(struct net_device *dev)
+{
+	struct ip6_tnl *tunnel;
+
+	tunnel = netdev_priv(dev);
+
+	tunnel->dev = dev;
+	tunnel->net = dev_net(dev);
+	strcpy(tunnel->parms.name, dev->name);
+
+	ip6gre_tnl_link_config(tunnel, 1);
+
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static const struct net_device_ops ip6gre_tap_netdev_ops = {
+	.ndo_init = ip6gre_tap_init,
+	.ndo_uninit = ip6gre_tunnel_uninit,
+	.ndo_start_xmit = ip6gre_tunnel_xmit,
+	.ndo_set_mac_address = eth_mac_addr,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = ip6gre_tunnel_change_mtu,
+	.ndo_get_stats64 = ip_tunnel_get_stats64,
+};
+
+static void ip6gre_tap_setup(struct net_device *dev)
+{
+
+	ether_setup(dev);
+
+	dev->netdev_ops = &ip6gre_tap_netdev_ops;
+	dev->destructor = ip6gre_dev_free;
+
+	dev->iflink = 0;
+	dev->features |= NETIF_F_NETNS_LOCAL;
+}
+
+static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
+	struct nlattr *tb[], struct nlattr *data[])
+{
+	struct ip6_tnl *nt;
+	struct net *net = dev_net(dev);
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	int err;
+
+	nt = netdev_priv(dev);
+	ip6gre_netlink_parms(data, &nt->parms);
+
+	if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+		return -EEXIST;
+
+	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+		eth_hw_addr_random(dev);
+
+	nt->dev = dev;
+	nt->net = dev_net(dev);
+	ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+	/* Can use a lockless transmit, unless we generate output sequences */
+	if (!(nt->parms.o_flags & GRE_SEQ))
+		dev->features |= NETIF_F_LLTX;
+
+	err = register_netdevice(dev);
+	if (err)
+		goto out;
+
+	dev_hold(dev);
+	ip6gre_tunnel_link(ign, nt);
+
+out:
+	return err;
+}
+
+static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+			    struct nlattr *data[])
+{
+	struct ip6_tnl *t, *nt = netdev_priv(dev);
+	struct net *net = nt->net;
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	struct __ip6_tnl_parm p;
+
+	if (dev == ign->fb_tunnel_dev)
+		return -EINVAL;
+
+	ip6gre_netlink_parms(data, &p);
+
+	t = ip6gre_tunnel_locate(net, &p, 0);
+
+	if (t) {
+		if (t->dev != dev)
+			return -EEXIST;
+	} else {
+		t = nt;
+
+		ip6gre_tunnel_unlink(ign, t);
+		ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+		ip6gre_tunnel_link(ign, t);
+		netdev_state_change(dev);
+	}
+
+	return 0;
+}
+
+static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct net *net = dev_net(dev);
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+	if (dev != ign->fb_tunnel_dev)
+		unregister_netdevice_queue(dev, head);
+}
+
+static size_t ip6gre_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_GRE_LINK */
+		nla_total_size(4) +
+		/* IFLA_GRE_IFLAGS */
+		nla_total_size(2) +
+		/* IFLA_GRE_OFLAGS */
+		nla_total_size(2) +
+		/* IFLA_GRE_IKEY */
+		nla_total_size(4) +
+		/* IFLA_GRE_OKEY */
+		nla_total_size(4) +
+		/* IFLA_GRE_LOCAL */
+		nla_total_size(sizeof(struct in6_addr)) +
+		/* IFLA_GRE_REMOTE */
+		nla_total_size(sizeof(struct in6_addr)) +
+		/* IFLA_GRE_TTL */
+		nla_total_size(1) +
+		/* IFLA_GRE_TOS */
+		nla_total_size(1) +
+		/* IFLA_GRE_ENCAP_LIMIT */
+		nla_total_size(1) +
+		/* IFLA_GRE_FLOWINFO */
+		nla_total_size(4) +
+		/* IFLA_GRE_FLAGS */
+		nla_total_size(4) +
+		0;
+}
+
+static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct __ip6_tnl_parm *p = &t->parms;
+
+	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+	    nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
+	    nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+	    nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) ||
+	    nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) ||
+	    nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
+	    /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
+	    nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
+	    nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
+	    nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
+	[IFLA_GRE_LINK]        = { .type = NLA_U32 },
+	[IFLA_GRE_IFLAGS]      = { .type = NLA_U16 },
+	[IFLA_GRE_OFLAGS]      = { .type = NLA_U16 },
+	[IFLA_GRE_IKEY]        = { .type = NLA_U32 },
+	[IFLA_GRE_OKEY]        = { .type = NLA_U32 },
+	[IFLA_GRE_LOCAL]       = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+	[IFLA_GRE_REMOTE]      = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+	[IFLA_GRE_TTL]         = { .type = NLA_U8 },
+	[IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
+	[IFLA_GRE_FLOWINFO]    = { .type = NLA_U32 },
+	[IFLA_GRE_FLAGS]       = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
+	.kind		= "ip6gre",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ip6gre_policy,
+	.priv_size	= sizeof(struct ip6_tnl),
+	.setup		= ip6gre_tunnel_setup,
+	.validate	= ip6gre_tunnel_validate,
+	.newlink	= ip6gre_newlink,
+	.changelink	= ip6gre_changelink,
+	.dellink	= ip6gre_dellink,
+	.get_size	= ip6gre_get_size,
+	.fill_info	= ip6gre_fill_info,
+};
+
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
+	.kind		= "ip6gretap",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ip6gre_policy,
+	.priv_size	= sizeof(struct ip6_tnl),
+	.setup		= ip6gre_tap_setup,
+	.validate	= ip6gre_tap_validate,
+	.newlink	= ip6gre_newlink,
+	.changelink	= ip6gre_changelink,
+	.get_size	= ip6gre_get_size,
+	.fill_info	= ip6gre_fill_info,
+};
+
+/*
+ *	And now the modules code and kernel interface.
+ */
+
+static int __init ip6gre_init(void)
+{
+	int err;
+
+	pr_info("GRE over IPv6 tunneling driver\n");
+
+	err = register_pernet_device(&ip6gre_net_ops);
+	if (err < 0)
+		return err;
+
+	err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
+	if (err < 0) {
+		pr_info("%s: can't add protocol\n", __func__);
+		goto add_proto_failed;
+	}
+
+	err = rtnl_link_register(&ip6gre_link_ops);
+	if (err < 0)
+		goto rtnl_link_failed;
+
+	err = rtnl_link_register(&ip6gre_tap_ops);
+	if (err < 0)
+		goto tap_ops_failed;
+
+out:
+	return err;
+
+tap_ops_failed:
+	rtnl_link_unregister(&ip6gre_link_ops);
+rtnl_link_failed:
+	inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+add_proto_failed:
+	unregister_pernet_device(&ip6gre_net_ops);
+	goto out;
+}
+
+static void __exit ip6gre_fini(void)
+{
+	rtnl_link_unregister(&ip6gre_tap_ops);
+	rtnl_link_unregister(&ip6gre_link_ops);
+	inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+	unregister_pernet_device(&ip6gre_net_ops);
+}
+
+module_init(ip6gre_init);
+module_exit(ip6gre_fini);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
+MODULE_ALIAS_RTNL_LINK("ip6gre");
+MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
new file mode 100644
index 00000000000..4578e23834f
--- /dev/null
+++ b/net/ipv6/ip6_icmp.c
@@ -0,0 +1,47 @@
+#include <linux/export.h>
+#include <linux/icmpv6.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+#include <net/ipv6.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static ip6_icmp_send_t __rcu *ip6_icmp_send;
+
+int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
+{
+	return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ?
+	        0 : -EBUSY;
+}
+EXPORT_SYMBOL(inet6_register_icmp_sender);
+
+int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
+{
+	int ret;
+
+	ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ?
+	      0 : -EINVAL;
+
+	synchronize_net();
+
+	return ret;
+}
+EXPORT_SYMBOL(inet6_unregister_icmp_sender);
+
+void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+{
+	ip6_icmp_send_t *send;
+
+	rcu_read_lock();
+	send = rcu_dereference(ip6_icmp_send);
+
+	if (!send)
+		goto out;
+	send(skb, type, code, info);
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(icmpv6_send);
+#endif
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6e348042693..51d54dc376f 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -1,13 +1,11 @@
 /*
  *	IPv6 input
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Ian P. Morris		<I.P.Morris@soton.ac.uk>
  *
- *	$Id: ip6_input.c,v 1.19 2000/12/13 18:31:50 davem Exp $
- *
  *	Based in linux/net/ipv4/ip_input.c
  *
  *	This program is free software; you can redistribute it and/or
@@ -25,11 +23,12 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
-#include <linux/sched.h>
 #include <linux/net.h>
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/icmpv6.h>
+#include <linux/mroute6.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
@@ -45,12 +44,19 @@
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
 #include <net/xfrm.h>
+#include <net/inet_ecn.h>
 
 
-
-static inline int ip6_rcv_finish( struct sk_buff *skb) 
+int ip6_rcv_finish(struct sk_buff *skb)
 {
-	if (skb->dst == NULL)
+	if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
+		const struct inet6_protocol *ipprot;
+
+		ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
+		if (ipprot && ipprot->early_demux)
+			ipprot->early_demux(skb);
+	}
+	if (!skb_dst(skb))
 		ip6_route_input(skb);
 
 	return dst_input(skb);
@@ -58,76 +64,133 @@ static inline int ip6_rcv_finish( struct sk_buff *skb)
 
 int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
-	struct ipv6hdr *hdr;
+	const struct ipv6hdr *hdr;
 	u32 		pkt_len;
+	struct inet6_dev *idev;
+	struct net *net = dev_net(skb->dev);
 
-	if (skb->pkt_type == PACKET_OTHERHOST)
-		goto drop;
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
 
-	IP6_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
+	rcu_read_lock();
+
+	idev = __in6_dev_get(skb->dev);
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
-		goto out;
+	IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len);
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
+	    !idev || unlikely(idev->cnf.disable_ipv6)) {
+		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
+		goto drop;
 	}
 
+	memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+
 	/*
 	 * Store incoming device index. When the packet will
 	 * be queued, we cannot refer to skb->dev anymore.
 	 *
 	 * BTW, when we send a packet for our own local address on a
 	 * non-loopback interface (e.g. ethX), it is being delivered
-	 * via the loopback interface (lo) here; skb->dev = &loopback_dev.
+	 * via the loopback interface (lo) here; skb->dev = loopback_dev.
 	 * It, however, should be considered as if it is being
 	 * arrived via the sending interface (ethX), because of the
 	 * nature of scoping architecture. --yoshfuji
 	 */
-	IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex;
+	IP6CB(skb)->iif = skb_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
 
-	if (skb->len < sizeof(struct ipv6hdr))
+	if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
 		goto err;
 
-	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-		goto drop;
-	}
-
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	if (hdr->version != 6)
 		goto err;
 
+	IP6_ADD_STATS_BH(dev_net(dev), idev,
+			 IPSTATS_MIB_NOECTPKTS +
+				(ipv6_get_dsfield(hdr) & INET_ECN_MASK),
+			 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
+	/*
+	 * RFC4291 2.5.3
+	 * A packet received on an interface with a destination address
+	 * of loopback must be dropped.
+	 */
+	if (!(dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_loopback(&hdr->daddr))
+		goto err;
+
+	/* RFC4291 Errata ID: 3480
+	 * Interface-Local scope spans only a single interface on a
+	 * node and is useful only for loopback transmission of
+	 * multicast.  Packets with interface-local scope received
+	 * from another node must be discarded.
+	 */
+	if (!(skb->pkt_type == PACKET_LOOPBACK ||
+	      dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_is_multicast(&hdr->daddr) &&
+	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
+		goto err;
+
+	/* RFC4291 2.7
+	 * Nodes must not originate a packet to a multicast address whose scope
+	 * field contains the reserved value 0; if such a packet is received, it
+	 * must be silently dropped.
+	 */
+	if (ipv6_addr_is_multicast(&hdr->daddr) &&
+	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0)
+		goto err;
+
+	/*
+	 * RFC4291 2.7
+	 * Multicast addresses must not be used as source addresses in IPv6
+	 * packets or appear in any Routing header.
+	 */
+	if (ipv6_addr_is_multicast(&hdr->saddr))
+		goto err;
+
+	skb->transport_header = skb->network_header + sizeof(*hdr);
+	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
 	pkt_len = ntohs(hdr->payload_len);
 
 	/* pkt_len may be zero if Jumbo payload option is present */
 	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
-		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
-			goto truncated;
+		if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
+			IP6_INC_STATS_BH(net,
+					 idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+			goto drop;
+		}
 		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
-			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
 			goto drop;
 		}
-		hdr = skb->nh.ipv6h;
+		hdr = ipv6_hdr(skb);
 	}
 
 	if (hdr->nexthdr == NEXTHDR_HOP) {
-		skb->h.raw = (u8*)(hdr+1);
-		if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) {
-			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-			return 0;
+		if (ipv6_parse_hopopts(skb) < 0) {
+			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+			rcu_read_unlock();
+			return NET_RX_DROP;
 		}
-		hdr = skb->nh.ipv6h;
 	}
 
-	return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
-truncated:
-	IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+	rcu_read_unlock();
+
+	/* Must drop socket now because of tproxy. */
+	skb_orphan(skb);
+
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL,
+		       ip6_rcv_finish);
 err:
-	IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
 drop:
+	rcu_read_unlock();
 	kfree_skb(skb);
-out:
-	return 0;
+	return NET_RX_DROP;
 }
 
 /*
@@ -135,82 +198,76 @@ out:
  */
 
 
-static inline int ip6_input_finish(struct sk_buff *skb)
+static int ip6_input_finish(struct sk_buff *skb)
 {
-	struct inet6_protocol *ipprot;
-	struct sock *raw_sk;
+	struct net *net = dev_net(skb_dst(skb)->dev);
+	const struct inet6_protocol *ipprot;
+	struct inet6_dev *idev;
 	unsigned int nhoff;
 	int nexthdr;
-	u8 hash;
-
-	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
+	bool raw;
 
 	/*
 	 *	Parse extension headers
 	 */
 
-	nexthdr = skb->nh.ipv6h->nexthdr;
-	nhoff = offsetof(struct ipv6hdr, nexthdr);
-
-	/* Skip hop-by-hop options, they are already parsed. */
-	if (nexthdr == NEXTHDR_HOP) {
-		nhoff = sizeof(struct ipv6hdr);
-		nexthdr = skb->h.raw[0];
-		skb->h.raw += (skb->h.raw[1]+1)<<3;
-	}
-
 	rcu_read_lock();
 resubmit:
-	if (!pskb_pull(skb, skb->h.raw - skb->data))
+	idev = ip6_dst_idev(skb_dst(skb));
+	if (!pskb_pull(skb, skb_transport_offset(skb)))
 		goto discard;
-	nexthdr = skb->nh.raw[nhoff];
+	nhoff = IP6CB(skb)->nhoff;
+	nexthdr = skb_network_header(skb)[nhoff];
 
-	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
-	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
-		raw_sk = NULL;
-
-	hash = nexthdr & (MAX_INET_PROTOS - 1);
-	if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
+	raw = raw6_local_deliver(skb, nexthdr);
+	if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) {
 		int ret;
-		
+
 		if (ipprot->flags & INET6_PROTO_FINAL) {
-			struct ipv6hdr *hdr;	
+			const struct ipv6hdr *hdr;
 
-			skb_postpull_rcsum(skb, skb->nh.raw,
-					   skb->h.raw - skb->nh.raw);
-			hdr = skb->nh.ipv6h;
+			/* Free reference early: we don't need it any more,
+			   and it may hold ip_conntrack module loaded
+			   indefinitely. */
+			nf_reset(skb);
+
+			skb_postpull_rcsum(skb, skb_network_header(skb),
+					   skb_network_header_len(skb));
+			hdr = ipv6_hdr(skb);
 			if (ipv6_addr_is_multicast(&hdr->daddr) &&
 			    !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
 			    &hdr->saddr) &&
-			    !ipv6_is_mld(skb, nexthdr))
+			    !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
 				goto discard;
 		}
 		if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
-		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 
+		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto discard;
-		
-		ret = ipprot->handler(&skb, &nhoff);
+
+		ret = ipprot->handler(skb);
 		if (ret > 0)
 			goto resubmit;
 		else if (ret == 0)
-			IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
 	} else {
-		if (!raw_sk) {
+		if (!raw) {
 			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-				IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
+				IP6_INC_STATS_BH(net, idev,
+						 IPSTATS_MIB_INUNKNOWNPROTOS);
 				icmpv6_send(skb, ICMPV6_PARAMPROB,
-				            ICMPV6_UNK_NEXTHDR, nhoff,
-				            skb->dev);
+					    ICMPV6_UNK_NEXTHDR, nhoff);
 			}
-		} else
-			IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
-		kfree_skb(skb);
+			kfree_skb(skb);
+		} else {
+			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
+			consume_skb(skb);
+		}
 	}
 	rcu_read_unlock();
 	return 0;
 
 discard:
-	IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
 	rcu_read_unlock();
 	kfree_skb(skb);
 	return 0;
@@ -219,52 +276,86 @@ discard:
 
 int ip6_input(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET6,NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
+		       ip6_input_finish);
 }
 
 int ip6_mc_input(struct sk_buff *skb)
 {
-	struct ipv6hdr *hdr;
-	int deliver;
+	const struct ipv6hdr *hdr;
+	bool deliver;
 
-	IP6_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+	IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
+			 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
+			 skb->len);
 
-	hdr = skb->nh.ipv6h;
-	deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
-	    ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
+	hdr = ipv6_hdr(skb);
+	deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
 
+#ifdef CONFIG_IPV6_MROUTE
 	/*
-	 *	IPv6 multicast router mode isnt currently supported.
+	 *      IPv6 multicast router mode is now supported ;)
 	 */
-#if 0
-	if (ipv6_config.multicast_route) {
-		int addr_type;
-
-		addr_type = ipv6_addr_type(&hdr->daddr);
-
-		if (!(addr_type & (IPV6_ADDR_LOOPBACK | IPV6_ADDR_LINKLOCAL))) {
-			struct sk_buff *skb2;
-			struct dst_entry *dst;
-
-			dst = skb->dst;
-			
-			if (deliver) {
-				skb2 = skb_clone(skb, GFP_ATOMIC);
-				dst_output(skb2);
-			} else {
-				dst_output(skb);
-				return 0;
+	if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+	    !(ipv6_addr_type(&hdr->daddr) &
+	      (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
+	    likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
+		/*
+		 * Okay, we try to forward - split and duplicate
+		 * packets.
+		 */
+		struct sk_buff *skb2;
+		struct inet6_skb_parm *opt = IP6CB(skb);
+
+		/* Check for MLD */
+		if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
+			/* Check if this is a mld message */
+			u8 nexthdr = hdr->nexthdr;
+			__be16 frag_off;
+			int offset;
+
+			/* Check if the value of Router Alert
+			 * is for MLD (0x0000).
+			 */
+			if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
+				deliver = false;
+
+				if (!ipv6_ext_hdr(nexthdr)) {
+					/* BUG */
+					goto out;
+				}
+				offset = ipv6_skip_exthdr(skb, sizeof(*hdr),
+							  &nexthdr, &frag_off);
+				if (offset < 0)
+					goto out;
+
+				if (!ipv6_is_mld(skb, nexthdr, offset))
+					goto out;
+
+				deliver = true;
 			}
+			/* unknown RA - process it normally */
+		}
+
+		if (deliver)
+			skb2 = skb_clone(skb, GFP_ATOMIC);
+		else {
+			skb2 = skb;
+			skb = NULL;
+		}
+
+		if (skb2) {
+			ip6_mr_input(skb2);
 		}
 	}
+out:
 #endif
-
-	if (likely(deliver)) {
+	if (likely(deliver))
 		ip6_input(skb);
-		return 0;
+	else {
+		/* discard */
+		kfree_skb(skb);
 	}
-	/* discard */
-	kfree_skb(skb);
 
 	return 0;
 }
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
new file mode 100644
index 00000000000..65eda2a8af4
--- /dev/null
+++ b/net/ipv6/ip6_offload.c
@@ -0,0 +1,337 @@
+/*
+ *	IPV6 GSO/GRO offload support
+ *	Linux INET6 implementation
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/printk.h>
+
+#include <net/protocol.h>
+#include <net/ipv6.h>
+
+#include "ip6_offload.h"
+
+static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
+{
+	const struct net_offload *ops = NULL;
+
+	for (;;) {
+		struct ipv6_opt_hdr *opth;
+		int len;
+
+		if (proto != NEXTHDR_HOP) {
+			ops = rcu_dereference(inet6_offloads[proto]);
+
+			if (unlikely(!ops))
+				break;
+
+			if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+				break;
+		}
+
+		if (unlikely(!pskb_may_pull(skb, 8)))
+			break;
+
+		opth = (void *)skb->data;
+		len = ipv6_optlen(opth);
+
+		if (unlikely(!pskb_may_pull(skb, len)))
+			break;
+
+		proto = opth->nexthdr;
+		__skb_pull(skb, len);
+	}
+
+	return proto;
+}
+
+static int ipv6_gso_send_check(struct sk_buff *skb)
+{
+	const struct ipv6hdr *ipv6h;
+	const struct net_offload *ops;
+	int err = -EINVAL;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+		goto out;
+
+	ipv6h = ipv6_hdr(skb);
+	__skb_pull(skb, sizeof(*ipv6h));
+	err = -EPROTONOSUPPORT;
+
+	ops = rcu_dereference(inet6_offloads[
+		ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
+
+	if (likely(ops && ops->callbacks.gso_send_check)) {
+		skb_reset_transport_header(skb);
+		err = ops->callbacks.gso_send_check(skb);
+	}
+
+out:
+	return err;
+}
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+	netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct ipv6hdr *ipv6h;
+	const struct net_offload *ops;
+	int proto;
+	struct frag_hdr *fptr;
+	unsigned int unfrag_ip6hlen;
+	u8 *prevhdr;
+	int offset = 0;
+	bool encap, udpfrag;
+	int nhoff;
+
+	if (unlikely(skb_shinfo(skb)->gso_type &
+		     ~(SKB_GSO_UDP |
+		       SKB_GSO_DODGY |
+		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
+		       SKB_GSO_GRE_CSUM |
+		       SKB_GSO_IPIP |
+		       SKB_GSO_SIT |
+		       SKB_GSO_UDP_TUNNEL |
+		       SKB_GSO_UDP_TUNNEL_CSUM |
+		       SKB_GSO_MPLS |
+		       SKB_GSO_TCPV6 |
+		       0)))
+		goto out;
+
+	skb_reset_network_header(skb);
+	nhoff = skb_network_header(skb) - skb_mac_header(skb);
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+		goto out;
+
+	encap = SKB_GSO_CB(skb)->encap_level > 0;
+	if (encap)
+		features = skb->dev->hw_enc_features & netif_skb_features(skb);
+	SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
+
+	ipv6h = ipv6_hdr(skb);
+	__skb_pull(skb, sizeof(*ipv6h));
+	segs = ERR_PTR(-EPROTONOSUPPORT);
+
+	proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+
+	if (skb->encapsulation &&
+	    skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+		udpfrag = proto == IPPROTO_UDP && encap;
+	else
+		udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+
+	ops = rcu_dereference(inet6_offloads[proto]);
+	if (likely(ops && ops->callbacks.gso_segment)) {
+		skb_reset_transport_header(skb);
+		segs = ops->callbacks.gso_segment(skb, features);
+	}
+
+	if (IS_ERR(segs))
+		goto out;
+
+	for (skb = segs; skb; skb = skb->next) {
+		ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
+		ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
+		skb->network_header = (u8 *)ipv6h - skb->head;
+
+		if (udpfrag) {
+			unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+			fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
+			fptr->frag_off = htons(offset);
+			if (skb->next != NULL)
+				fptr->frag_off |= htons(IP6_MF);
+			offset += (ntohs(ipv6h->payload_len) -
+				   sizeof(struct frag_hdr));
+		}
+		if (encap)
+			skb_reset_inner_headers(skb);
+	}
+
+out:
+	return segs;
+}
+
+/* Return the total length of all the extension hdrs, following the same
+ * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs.
+ */
+static int ipv6_exthdrs_len(struct ipv6hdr *iph,
+			    const struct net_offload **opps)
+{
+	struct ipv6_opt_hdr *opth = (void *)iph;
+	int len = 0, proto, optlen = sizeof(*iph);
+
+	proto = iph->nexthdr;
+	for (;;) {
+		if (proto != NEXTHDR_HOP) {
+			*opps = rcu_dereference(inet6_offloads[proto]);
+			if (unlikely(!(*opps)))
+				break;
+			if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
+				break;
+		}
+		opth = (void *)opth + optlen;
+		optlen = ipv6_optlen(opth);
+		len += optlen;
+		proto = opth->nexthdr;
+	}
+	return len;
+}
+
+static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	const struct net_offload *ops;
+	struct sk_buff **pp = NULL;
+	struct sk_buff *p;
+	struct ipv6hdr *iph;
+	unsigned int nlen;
+	unsigned int hlen;
+	unsigned int off;
+	u16 flush = 1;
+	int proto;
+
+	off = skb_gro_offset(skb);
+	hlen = off + sizeof(*iph);
+	iph = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen)) {
+		iph = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!iph))
+			goto out;
+	}
+
+	skb_set_network_header(skb, off);
+	skb_gro_pull(skb, sizeof(*iph));
+	skb_set_transport_header(skb, skb_gro_offset(skb));
+
+	flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+
+	rcu_read_lock();
+	proto = iph->nexthdr;
+	ops = rcu_dereference(inet6_offloads[proto]);
+	if (!ops || !ops->callbacks.gro_receive) {
+		__pskb_pull(skb, skb_gro_offset(skb));
+		proto = ipv6_gso_pull_exthdrs(skb, proto);
+		skb_gro_pull(skb, -skb_transport_offset(skb));
+		skb_reset_transport_header(skb);
+		__skb_push(skb, skb_gro_offset(skb));
+
+		ops = rcu_dereference(inet6_offloads[proto]);
+		if (!ops || !ops->callbacks.gro_receive)
+			goto out_unlock;
+
+		iph = ipv6_hdr(skb);
+	}
+
+	NAPI_GRO_CB(skb)->proto = proto;
+
+	flush--;
+	nlen = skb_network_header_len(skb);
+
+	for (p = *head; p; p = p->next) {
+		const struct ipv6hdr *iph2;
+		__be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
+
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		iph2 = (struct ipv6hdr *)(p->data + off);
+		first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
+
+		/* All fields must match except length and Traffic Class.
+		 * XXX skbs on the gro_list have all been parsed and pulled
+		 * already so we don't need to compare nlen
+		 * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops)))
+		 * memcmp() alone below is suffcient, right?
+		 */
+		 if ((first_word & htonl(0xF00FFFFF)) ||
+		    memcmp(&iph->nexthdr, &iph2->nexthdr,
+			   nlen - offsetof(struct ipv6hdr, nexthdr))) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+		/* flush if Traffic Class fields are different */
+		NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+		NAPI_GRO_CB(p)->flush |= flush;
+	}
+
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	skb_gro_postpull_rcsum(skb, iph, nlen);
+
+	pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+
+static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	const struct net_offload *ops;
+	struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
+	int err = -ENOSYS;
+
+	iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
+
+	rcu_read_lock();
+
+	nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
+	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+		goto out_unlock;
+
+	err = ops->callbacks.gro_complete(skb, nhoff);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return err;
+}
+
+static struct packet_offload ipv6_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_IPV6),
+	.callbacks = {
+		.gso_send_check = ipv6_gso_send_check,
+		.gso_segment = ipv6_gso_segment,
+		.gro_receive = ipv6_gro_receive,
+		.gro_complete = ipv6_gro_complete,
+	},
+};
+
+static const struct net_offload sit_offload = {
+	.callbacks = {
+		.gso_send_check = ipv6_gso_send_check,
+		.gso_segment	= ipv6_gso_segment,
+	},
+};
+
+static int __init ipv6_offload_init(void)
+{
+
+	if (tcpv6_offload_init() < 0)
+		pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
+	if (udp_offload_init() < 0)
+		pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
+	if (ipv6_exthdrs_offload_init() < 0)
+		pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
+
+	dev_add_offload(&ipv6_packet_offload);
+
+	inet_add_offload(&sit_offload, IPPROTO_IPV6);
+
+	return 0;
+}
+
+fs_initcall(ipv6_offload_init);
diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h
new file mode 100644
index 00000000000..2e155c651b3
--- /dev/null
+++ b/net/ipv6/ip6_offload.h
@@ -0,0 +1,18 @@
+/*
+ *	IPV6 GSO/GRO offload support
+ *	Linux INET6 implementation
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ip6_offload_h
+#define __ip6_offload_h
+
+int ipv6_exthdrs_offload_init(void);
+int udp_offload_init(void);
+int tcpv6_offload_init(void);
+
+#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 614296a920c..45702b8cd14 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1,11 +1,9 @@
 /*
  *	IPv6 output functions
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
- *
- *	$Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	Based on linux/net/ipv4/ip_output.c
  *
@@ -28,9 +26,8 @@
  *				for datagram xmit
  */
 
-#include <linux/config.h>
 #include <linux/errno.h>
-#include <linux/types.h>
+#include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/net.h>
@@ -39,6 +36,8 @@
 #include <linux/in6.h>
 #include <linux/tcp.h>
 #include <linux/route.h>
+#include <linux/module.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
@@ -55,123 +54,119 @@
 #include <net/icmp.h>
 #include <net/xfrm.h>
 #include <net/checksum.h>
+#include <linux/mroute6.h>
 
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
-
-static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
-{
-	static u32 ipv6_fragmentation_id = 1;
-	static DEFINE_SPINLOCK(ip6_id_lock);
-
-	spin_lock_bh(&ip6_id_lock);
-	fhdr->identification = htonl(ipv6_fragmentation_id);
-	if (++ipv6_fragmentation_id == 0)
-		ipv6_fragmentation_id = 1;
-	spin_unlock_bh(&ip6_id_lock);
-}
-
-static inline int ip6_output_finish(struct sk_buff *skb)
-{
-
-	struct dst_entry *dst = skb->dst;
-	struct hh_cache *hh = dst->hh;
-
-	if (hh) {
-		int hh_alen;
-
-		read_lock_bh(&hh->hh_lock);
-		hh_alen = HH_DATA_ALIGN(hh->hh_len);
-		memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
-		read_unlock_bh(&hh->hh_lock);
-	        skb_push(skb, hh->hh_len);
-		return hh->hh_output(skb);
-	} else if (dst->neighbour)
-		return dst->neighbour->output(skb);
-
-	IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
-	kfree_skb(skb);
-	return -EINVAL;
-
-}
-
-/* dev_loopback_xmit for use with netfilter. */
-static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
-{
-	newskb->mac.raw = newskb->data;
-	__skb_pull(newskb, newskb->nh.raw - newskb->data);
-	newskb->pkt_type = PACKET_LOOPBACK;
-	newskb->ip_summed = CHECKSUM_UNNECESSARY;
-	BUG_TRAP(newskb->dst);
-
-	netif_rx(newskb);
-	return 0;
-}
-
-
-static int ip6_output2(struct sk_buff *skb)
+static int ip6_finish_output2(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
+	struct neighbour *neigh;
+	struct in6_addr *nexthop;
+	int ret;
 
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->dev = dev;
 
-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
-		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
+	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
+		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
-		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
-		    ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
-				&skb->nh.ipv6h->saddr)) {
+		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
+		    ((mroute6_socket(dev_net(dev), skb) &&
+		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
+		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
+					 &ipv6_hdr(skb)->saddr))) {
 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 
 			/* Do not check for IFF_ALLMULTI; multicast routing
 			   is not supported in any case.
 			 */
 			if (newskb)
-				NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
-					newskb->dev,
-					ip6_dev_loopback_xmit);
+				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+					newskb, NULL, newskb->dev,
+					dev_loopback_xmit);
 
-			if (skb->nh.ipv6h->hop_limit == 0) {
-				IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+			if (ipv6_hdr(skb)->hop_limit == 0) {
+				IP6_INC_STATS(dev_net(dev), idev,
+					      IPSTATS_MIB_OUTDISCARDS);
 				kfree_skb(skb);
 				return 0;
 			}
 		}
 
-		IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
+				skb->len);
+
+		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
+		    IPV6_ADDR_SCOPE_NODELOCAL &&
+		    !(dev->flags & IFF_LOOPBACK)) {
+			kfree_skb(skb);
+			return 0;
+		}
+	}
+
+	rcu_read_lock_bh();
+	nexthop = rt6_nexthop((struct rt6_info *)dst);
+	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+	if (unlikely(!neigh))
+		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+	if (!IS_ERR(neigh)) {
+		ret = dst_neigh_output(dst, neigh, skb);
+		rcu_read_unlock_bh();
+		return ret;
 	}
+	rcu_read_unlock_bh();
 
-	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
+	IP6_INC_STATS(dev_net(dst->dev),
+		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EINVAL;
 }
 
-int ip6_output(struct sk_buff *skb)
+static int ip6_finish_output(struct sk_buff *skb)
 {
-	if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) ||
-				dst_allfrag(skb->dst))
-		return ip6_fragment(skb, ip6_output2);
+	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+	    dst_allfrag(skb_dst(skb)) ||
+	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
+		return ip6_fragment(skb, ip6_finish_output2);
 	else
-		return ip6_output2(skb);
+		return ip6_finish_output2(skb);
+}
+
+int ip6_output(struct sock *sk, struct sk_buff *skb)
+{
+	struct net_device *dev = skb_dst(skb)->dev;
+	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+	if (unlikely(idev->cnf.disable_ipv6)) {
+		IP6_INC_STATS(dev_net(dev), idev,
+			      IPSTATS_MIB_OUTDISCARDS);
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+			    ip6_finish_output,
+			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
 /*
- *	xmit an sk_buff (used by TCP)
+ *	xmit an sk_buff (used by TCP, SCTP and DCCP)
  */
 
-int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
-	     struct ipv6_txoptions *opt, int ipfragok)
+int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+	     struct ipv6_txoptions *opt, int tclass)
 {
-	struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
-	struct in6_addr *first_hop = &fl->fl6_dst;
-	struct dst_entry *dst = skb->dst;
+	struct net *net = sock_net(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_addr *first_hop = &fl6->daddr;
+	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *hdr;
-	u8  proto = fl->proto;
+	u8  proto = fl6->flowi6_proto;
 	int seg_len = skb->len;
-	int hlimit, tclass;
+	int hlimit = -1;
 	u32 mtu;
 
 	if (opt) {
-		int head_room;
+		unsigned int head_room;
 
 		/* First: exthdrs may take lots of space (~8K for now)
 		   MAX_HEADER is not enough.
@@ -182,14 +177,15 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 
 		if (skb_headroom(skb) < head_room) {
 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
-			kfree_skb(skb);
-			skb = skb2;
-			if (skb == NULL) {	
-				IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+			if (skb2 == NULL) {
+				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+					      IPSTATS_MIB_OUTDISCARDS);
+				kfree_skb(skb);
 				return -ENOBUFS;
 			}
-			if (sk)
-				skb_set_owner_w(skb, sk);
+			consume_skb(skb);
+			skb = skb2;
+			skb_set_owner_w(skb, sk);
 		}
 		if (opt->opt_flen)
 			ipv6_push_frag_opts(skb, opt, &proto);
@@ -197,85 +193,47 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 	}
 
-	hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	hdr = ipv6_hdr(skb);
 
 	/*
 	 *	Fill in the IPv6 header
 	 */
-
-	hlimit = -1;
 	if (np)
 		hlimit = np->hop_limit;
 	if (hlimit < 0)
-		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
-	if (hlimit < 0)
-		hlimit = ipv6_get_hoplimit(dst->dev);
-
-	tclass = -1;
-	if (np)
-		tclass = np->tclass;
-	if (tclass < 0)
-		tclass = 0;
+		hlimit = ip6_dst_hoplimit(dst);
 
-	*(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
+	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
 	hdr->hop_limit = hlimit;
 
-	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
-	ipv6_addr_copy(&hdr->daddr, first_hop);
+	hdr->saddr = fl6->saddr;
+	hdr->daddr = *first_hop;
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->priority = sk->sk_priority;
+	skb->mark = sk->sk_mark;
 
 	mtu = dst_mtu(dst);
-	if ((skb->len <= mtu) || ipfragok) {
-		IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
-				dst_output);
+	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
+		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
+			      IPSTATS_MIB_OUT, skb->len);
+		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+			       dst->dev, dst_output);
 	}
 
-	if (net_ratelimit())
-		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 	skb->dev = dst->dev;
-	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-	IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+	ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return -EMSGSIZE;
 }
 
-/*
- *	To avoid extra problems ND packets are send through this
- *	routine. It's code duplication but I really want to avoid
- *	extra checks since ipv6_build_header is used by TCP (which
- *	is for us performance critical)
- */
-
-int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
-	       struct in6_addr *saddr, struct in6_addr *daddr,
-	       int proto, int len)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct ipv6hdr *hdr;
-	int totlen;
-
-	skb->protocol = htons(ETH_P_IPV6);
-	skb->dev = dev;
-
-	totlen = len + sizeof(struct ipv6hdr);
-
-	hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
-	skb->nh.ipv6h = hdr;
-
-	*(u32*)hdr = htonl(0x60000000);
-
-	hdr->payload_len = htons(len);
-	hdr->nexthdr = proto;
-	hdr->hop_limit = np->hop_limit;
-
-	ipv6_addr_copy(&hdr->saddr, saddr);
-	ipv6_addr_copy(&hdr->daddr, daddr);
-
-	return 0;
-}
+EXPORT_SYMBOL(ip6_xmit);
 
 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 {
@@ -306,26 +264,126 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 	return 0;
 }
 
+static int ip6_forward_proxy_check(struct sk_buff *skb)
+{
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
+	u8 nexthdr = hdr->nexthdr;
+	__be16 frag_off;
+	int offset;
+
+	if (ipv6_ext_hdr(nexthdr)) {
+		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
+		if (offset < 0)
+			return 0;
+	} else
+		offset = sizeof(struct ipv6hdr);
+
+	if (nexthdr == IPPROTO_ICMPV6) {
+		struct icmp6hdr *icmp6;
+
+		if (!pskb_may_pull(skb, (skb_network_header(skb) +
+					 offset + 1 - skb->data)))
+			return 0;
+
+		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
+
+		switch (icmp6->icmp6_type) {
+		case NDISC_ROUTER_SOLICITATION:
+		case NDISC_ROUTER_ADVERTISEMENT:
+		case NDISC_NEIGHBOUR_SOLICITATION:
+		case NDISC_NEIGHBOUR_ADVERTISEMENT:
+		case NDISC_REDIRECT:
+			/* For reaction involving unicast neighbor discovery
+			 * message destined to the proxied address, pass it to
+			 * input function.
+			 */
+			return 1;
+		default:
+			break;
+		}
+	}
+
+	/*
+	 * The proxying router can't forward traffic sent to a link-local
+	 * address, so signal the sender and discard the packet. This
+	 * behavior is clarified by the MIPv6 specification.
+	 */
+	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
+		dst_link_failure(skb);
+		return -1;
+	}
+
+	return 0;
+}
+
 static inline int ip6_forward_finish(struct sk_buff *skb)
 {
 	return dst_output(skb);
 }
 
+static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+{
+	unsigned int mtu;
+	struct inet6_dev *idev;
+
+	if (dst_metric_locked(dst, RTAX_MTU)) {
+		mtu = dst_metric_raw(dst, RTAX_MTU);
+		if (mtu)
+			return mtu;
+	}
+
+	mtu = IPV6_MIN_MTU;
+	rcu_read_lock();
+	idev = __in6_dev_get(dst->dev);
+	if (idev)
+		mtu = idev->cnf.mtu6;
+	rcu_read_unlock();
+
+	return mtu;
+}
+
+static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+	if (skb->len <= mtu)
+		return false;
+
+	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
+	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
+		return true;
+
+	if (skb->ignore_df)
+		return false;
+
+	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+		return false;
+
+	return true;
+}
+
 int ip6_forward(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct dst_entry *dst = skb_dst(skb);
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct inet6_skb_parm *opt = IP6CB(skb);
-	
-	if (ipv6_devconf.forwarding == 0)
+	struct net *net = dev_net(dst->dev);
+	u32 mtu;
+
+	if (net->ipv6.devconf_all->forwarding == 0)
 		goto error;
 
+	if (skb->pkt_type != PACKET_HOST)
+		goto drop;
+
+	if (skb_warn_if_lro(skb))
+		goto drop;
+
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
-		IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+				 IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
 
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_forward_csum(skb);
 
 	/*
 	 *	We DO NOT make any processing on
@@ -340,9 +398,8 @@ int ip6_forward(struct sk_buff *skb)
 	 *	cannot be fragmented, because there is no warranty
 	 *	that different fragments will go along one path. --ANK
 	 */
-	if (opt->ra) {
-		u8 *ptr = skb->nh.raw + opt->ra;
-		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
+	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
+		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 			return 0;
 	}
 
@@ -352,26 +409,42 @@ int ip6_forward(struct sk_buff *skb)
 	if (hdr->hop_limit <= 1) {
 		/* Force OUTPUT device used as source address */
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
-			    0, skb->dev);
+		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
+		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+				 IPSTATS_MIB_INHDRERRORS);
 
 		kfree_skb(skb);
 		return -ETIMEDOUT;
 	}
 
+	/* XXX: idev->cnf.proxy_ndp? */
+	if (net->ipv6.devconf_all->proxy_ndp &&
+	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
+		int proxied = ip6_forward_proxy_check(skb);
+		if (proxied > 0)
+			return ip6_input(skb);
+		else if (proxied < 0) {
+			IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+					 IPSTATS_MIB_INDISCARDS);
+			goto drop;
+		}
+	}
+
 	if (!xfrm6_route_forward(skb)) {
-		IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+				 IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
-	dst = skb->dst;
+	dst = skb_dst(skb);
 
 	/* IPv6 specs say nothing about it, but it is clear that we cannot
 	   send redirects to source routed frames.
+	   We don't send redirects to frames decapsulated from IPsec.
 	 */
-	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
+	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 		struct in6_addr *target = NULL;
+		struct inet_peer *peer;
 		struct rt6_info *rt;
-		struct neighbour *n = dst->neighbour;
 
 		/*
 		 *	incoming and outgoing devices are the same
@@ -379,48 +452,69 @@ int ip6_forward(struct sk_buff *skb)
 		 */
 
 		rt = (struct rt6_info *) dst;
-		if ((rt->rt6i_flags & RTF_GATEWAY))
-			target = (struct in6_addr*)&n->primary_key;
+		if (rt->rt6i_flags & RTF_GATEWAY)
+			target = &rt->rt6i_gateway;
 		else
 			target = &hdr->daddr;
 
+		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+
 		/* Limit redirects both by destination (here)
 		   and by source (inside ndisc_send_redirect)
 		 */
-		if (xrlim_allow(dst, 1*HZ))
-			ndisc_send_redirect(skb, n, target);
-	} else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
-						|IPV6_ADDR_LINKLOCAL)) {
+		if (inet_peer_xrlim_allow(peer, 1*HZ))
+			ndisc_send_redirect(skb, target);
+		if (peer)
+			inet_putpeer(peer);
+	} else {
+		int addrtype = ipv6_addr_type(&hdr->saddr);
+
 		/* This check is security critical. */
-		goto error;
+		if (addrtype == IPV6_ADDR_ANY ||
+		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
+			goto error;
+		if (addrtype & IPV6_ADDR_LINKLOCAL) {
+			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
+				    ICMPV6_NOT_NEIGHBOUR, 0);
+			goto error;
+		}
 	}
 
-	if (skb->len > dst_mtu(dst)) {
+	mtu = ip6_dst_mtu_forward(dst);
+	if (mtu < IPV6_MIN_MTU)
+		mtu = IPV6_MIN_MTU;
+
+	if (ip6_pkt_too_big(skb, mtu)) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
-		IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
-		IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+				 IPSTATS_MIB_INTOOBIGERRORS);
+		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+				 IPSTATS_MIB_FRAGFAILS);
 		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
 
 	if (skb_cow(skb, dst->dev->hard_header_len)) {
-		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+				 IPSTATS_MIB_OUTDISCARDS);
 		goto drop;
 	}
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	/* Mangling hops number delayed to point after skb COW */
- 
+
 	hdr->hop_limit--;
 
-	IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
-	return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
+		       ip6_forward_finish);
 
 error:
-	IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 drop:
 	kfree_skb(skb);
 	return -EINVAL;
@@ -431,122 +525,123 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->pkt_type = from->pkt_type;
 	to->priority = from->priority;
 	to->protocol = from->protocol;
-	dst_release(to->dst);
-	to->dst = dst_clone(from->dst);
+	skb_dst_drop(to);
+	skb_dst_set(to, dst_clone(skb_dst(from)));
 	to->dev = from->dev;
+	to->mark = from->mark;
 
 #ifdef CONFIG_NET_SCHED
 	to->tc_index = from->tc_index;
 #endif
-#ifdef CONFIG_NETFILTER
-	to->nfmark = from->nfmark;
-	/* Connection association is same as pre-frag packet */
-	to->nfct = from->nfct;
-	nf_conntrack_get(to->nfct);
-	to->nfctinfo = from->nfctinfo;
-#ifdef CONFIG_BRIDGE_NETFILTER
-	nf_bridge_put(to->nf_bridge);
-	to->nf_bridge = from->nf_bridge;
-	nf_bridge_get(to->nf_bridge);
-#endif
-#endif
+	nf_copy(to, from);
+	skb_copy_secmark(to, from);
 }
 
-int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 {
-	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
-	int found_rhdr = 0;
-	*nexthdr = &skb->nh.ipv6h->nexthdr;
-
-	while (offset + 1 <= packet_len) {
-
-		switch (**nexthdr) {
-
-		case NEXTHDR_HOP:
-		case NEXTHDR_ROUTING:
-		case NEXTHDR_DEST:
-			if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
-			if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
-			offset += ipv6_optlen(exthdr);
-			*nexthdr = &exthdr->nexthdr;
-			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
-			break;
-		default :
-			return offset;
-		}
-	}
+	static u32 ip6_idents_hashrnd __read_mostly;
+	u32 hash, id;
 
-	return offset;
+	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+
+	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+	hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
+
+	id = ip_idents_reserve(hash, 1);
+	fhdr->identification = htonl(id);
 }
 
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
-	struct net_device *dev;
 	struct sk_buff *frag;
-	struct rt6_info *rt = (struct rt6_info*)skb->dst;
+	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
+	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 	struct ipv6hdr *tmp_hdr;
 	struct frag_hdr *fh;
 	unsigned int mtu, hlen, left, len;
-	u32 frag_id = 0;
+	int hroom, troom;
+	__be32 frag_id = 0;
 	int ptr, offset = 0, err=0;
 	u8 *prevhdr, nexthdr = 0;
+	struct net *net = dev_net(skb_dst(skb)->dev);
 
-	dev = rt->u.dst.dev;
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
 
-	mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
+	mtu = ip6_skb_dst_mtu(skb);
+
+	/* We must not fragment if the socket is set to force MTU discovery
+	 * or if the skb it not generated by a local socket.
+	 */
+	if (unlikely(!skb->ignore_df && skb->len > mtu) ||
+		     (IP6CB(skb)->frag_max_size &&
+		      IP6CB(skb)->frag_max_size > mtu)) {
+		if (skb->sk && dst_allfrag(skb_dst(skb)))
+			sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
+
+		skb->dev = skb_dst(skb)->dev;
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+			      IPSTATS_MIB_FRAGFAILS);
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	if (np && np->frag_size < mtu) {
+		if (np->frag_size)
+			mtu = np->frag_size;
+	}
+	mtu -= hlen + sizeof(struct frag_hdr);
 
-	if (skb_shinfo(skb)->frag_list) {
+	if (skb_has_frag_list(skb)) {
 		int first_len = skb_pagelen(skb);
+		struct sk_buff *frag2;
 
 		if (first_len - hlen > mtu ||
 		    ((first_len - hlen) & 7) ||
 		    skb_cloned(skb))
 			goto slow_path;
 
-		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+		skb_walk_frags(skb, frag) {
 			/* Correct geometry. */
 			if (frag->len > mtu ||
 			    ((frag->len & 7) && frag->next) ||
 			    skb_headroom(frag) < hlen)
-			    goto slow_path;
+				goto slow_path_clean;
 
 			/* Partially cloned skb? */
 			if (skb_shared(frag))
-				goto slow_path;
+				goto slow_path_clean;
 
 			BUG_ON(frag->sk);
 			if (skb->sk) {
-				sock_hold(skb->sk);
 				frag->sk = skb->sk;
 				frag->destructor = sock_wfree;
-				skb->truesize -= frag->truesize;
 			}
+			skb->truesize -= frag->truesize;
 		}
 
 		err = 0;
 		offset = 0;
 		frag = skb_shinfo(skb)->frag_list;
-		skb_shinfo(skb)->frag_list = NULL;
+		skb_frag_list_init(skb);
 		/* BUILD HEADER */
 
-		tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
+		*prevhdr = NEXTHDR_FRAGMENT;
+		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 		if (!tmp_hdr) {
-			IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+				      IPSTATS_MIB_FRAGFAILS);
 			return -ENOMEM;
 		}
 
-		*prevhdr = NEXTHDR_FRAGMENT;
-		memcpy(tmp_hdr, skb->nh.raw, hlen);
 		__skb_pull(skb, hlen);
 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
-		skb->nh.raw = __skb_push(skb, hlen);
-		memcpy(skb->nh.raw, tmp_hdr, hlen);
+		__skb_push(skb, hlen);
+		skb_reset_network_header(skb);
+		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 
-		ipv6_select_ident(skb, fh);
+		ipv6_select_ident(fh, rt);
 		fh->nexthdr = nexthdr;
 		fh->reserved = 0;
 		fh->frag_off = htons(IP6_MF);
@@ -555,18 +650,22 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		first_len = skb_pagelen(skb);
 		skb->data_len = first_len - skb_headlen(skb);
 		skb->len = first_len;
-		skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
- 
+		ipv6_hdr(skb)->payload_len = htons(first_len -
+						   sizeof(struct ipv6hdr));
+
+		dst_hold(&rt->dst);
 
 		for (;;) {
 			/* Prepare header of the next frame,
 			 * before previous one went down. */
 			if (frag) {
 				frag->ip_summed = CHECKSUM_NONE;
-				frag->h.raw = frag->data;
+				skb_reset_transport_header(frag);
 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
-				frag->nh.raw = __skb_push(frag, hlen);
-				memcpy(frag->nh.raw, tmp_hdr, hlen);
+				__skb_push(frag, hlen);
+				skb_reset_network_header(frag);
+				memcpy(skb_network_header(frag), tmp_hdr,
+				       hlen);
 				offset += skb->len - hlen - sizeof(struct frag_hdr);
 				fh->nexthdr = nexthdr;
 				fh->reserved = 0;
@@ -574,11 +673,17 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 				if (frag->next != NULL)
 					fh->frag_off |= htons(IP6_MF);
 				fh->identification = frag_id;
-				frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+				ipv6_hdr(frag)->payload_len =
+						htons(frag->len -
+						      sizeof(struct ipv6hdr));
 				ip6_copy_metadata(frag, skb);
 			}
-			
+
 			err = output(skb);
+			if(!err)
+				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
+					      IPSTATS_MIB_FRAGCREATES);
+
 			if (err || !frag)
 				break;
 
@@ -587,11 +692,12 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			skb->next = NULL;
 		}
 
-		if (tmp_hdr)
-			kfree(tmp_hdr);
+		kfree(tmp_hdr);
 
 		if (err == 0) {
-			IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
+			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
+				      IPSTATS_MIB_FRAGOKS);
+			ip6_rt_put(rt);
 			return 0;
 		}
 
@@ -601,11 +707,26 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			frag = skb;
 		}
 
-		IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
+			      IPSTATS_MIB_FRAGFAILS);
+		ip6_rt_put(rt);
 		return err;
+
+slow_path_clean:
+		skb_walk_frags(skb, frag2) {
+			if (frag2 == frag)
+				break;
+			frag2->sk = NULL;
+			frag2->destructor = NULL;
+			skb->truesize += frag2->truesize;
+		}
 	}
 
 slow_path:
+	if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
+	    skb_checksum_help(skb))
+		goto fail;
+
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;			/* Where to start from */
 
@@ -614,6 +735,8 @@ slow_path:
 	 */
 
 	*prevhdr = NEXTHDR_FRAGMENT;
+	hroom = LL_RESERVED_SPACE(rt->dst.dev);
+	troom = rt->dst.dev->needed_tailroom;
 
 	/*
 	 *	Keep copying data until we run out.
@@ -623,7 +746,7 @@ slow_path:
 		/* IF: it doesn't fit, use 'mtu' - the data space left */
 		if (len > mtu)
 			len = mtu;
-		/* IF: we are not sending upto and including the packet end
+		/* IF: we are not sending up to and including the packet end
 		   then align the next start on an eight byte boundary */
 		if (len < left)	{
 			len &= ~7;
@@ -632,9 +755,11 @@ slow_path:
 		 *	Allocate buffer.
 		 */
 
-		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+		if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
+				      hroom + troom, GFP_ATOMIC)) == NULL) {
 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
-			IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+				      IPSTATS_MIB_FRAGFAILS);
 			err = -ENOMEM;
 			goto fail;
 		}
@@ -644,11 +769,12 @@ slow_path:
 		 */
 
 		ip6_copy_metadata(frag, skb);
-		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
+		skb_reserve(frag, hroom);
 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
-		frag->nh.raw = frag->data;
-		fh = (struct frag_hdr*)(frag->data + hlen);
-		frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
+		skb_reset_network_header(frag);
+		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
+		frag->transport_header = (frag->network_header + hlen +
+					  sizeof(struct frag_hdr));
 
 		/*
 		 *	Charge the memory for the fragment to any owner
@@ -660,7 +786,7 @@ slow_path:
 		/*
 		 *	Copy the packet header into the new buffer.
 		 */
-		memcpy(frag->nh.raw, skb->data, hlen);
+		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 
 		/*
 		 *	Build fragment header.
@@ -668,7 +794,7 @@ slow_path:
 		fh->nexthdr = nexthdr;
 		fh->reserved = 0;
 		if (!frag_id) {
-			ipv6_select_ident(skb, fh);
+			ipv6_select_ident(fh, rt);
 			frag_id = fh->identification;
 		} else
 			fh->identification = frag_id;
@@ -676,14 +802,15 @@ slow_path:
 		/*
 		 *	Copy a block of the IP datagram.
 		 */
-		if (skb_copy_bits(skb, ptr, frag->h.raw, len))
+		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 			BUG();
 		left -= len;
 
 		fh->frag_off = htons(offset);
 		if (left > 0)
 			fh->frag_off |= htons(IP6_MF);
-		frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+		ipv6_hdr(frag)->payload_len = htons(frag->len -
+						    sizeof(struct ipv6hdr));
 
 		ptr += len;
 		offset += len;
@@ -691,92 +818,242 @@ slow_path:
 		/*
 		 *	Put this fragment into the sending queue.
 		 */
-
-		IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
-
 		err = output(frag);
 		if (err)
 			goto fail;
+
+		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+			      IPSTATS_MIB_FRAGCREATES);
 	}
-	kfree_skb(skb);
-	IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+		      IPSTATS_MIB_FRAGOKS);
+	consume_skb(skb);
 	return err;
 
 fail:
-	kfree_skb(skb); 
-	IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+		      IPSTATS_MIB_FRAGFAILS);
+	kfree_skb(skb);
 	return err;
 }
 
-int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+static inline int ip6_rt_check(const struct rt6key *rt_key,
+			       const struct in6_addr *fl_addr,
+			       const struct in6_addr *addr_cache)
 {
-	int err = 0;
+	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
+		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
+}
 
-	*dst = NULL;
-	if (sk) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
-	
-		*dst = sk_dst_check(sk, np->dst_cookie);
-		if (*dst) {
-			struct rt6_info *rt = (struct rt6_info*)*dst;
-	
-				/* Yes, checking route validity in not connected
-				   case is not very simple. Take into account,
-				   that we do not support routing by source, TOS,
-				   and MSG_DONTROUTE 		--ANK (980726)
-	
-				   1. If route was host route, check that
-				      cached destination is current.
-				      If it is network route, we still may
-				      check its validity using saved pointer
-				      to the last used address: daddr_cache.
-				      We do not want to save whole address now,
-				      (because main consumer of this service
-				       is tcp, which has not this problem),
-				      so that the last trick works only on connected
-				      sockets.
-				   2. oif also should be the same.
-				 */
-	
-			if (((rt->rt6i_dst.plen != 128 ||
-			      !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
-			     && (np->daddr_cache == NULL ||
-				 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
-			    || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
-				dst_release(*dst);
-				*dst = NULL;
-			}
-		}
+static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
+					  struct dst_entry *dst,
+					  const struct flowi6 *fl6)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct rt6_info *rt;
+
+	if (!dst)
+		goto out;
+
+	if (dst->ops->family != AF_INET6) {
+		dst_release(dst);
+		return NULL;
+	}
+
+	rt = (struct rt6_info *)dst;
+	/* Yes, checking route validity in not connected
+	 * case is not very simple. Take into account,
+	 * that we do not support routing by source, TOS,
+	 * and MSG_DONTROUTE 		--ANK (980726)
+	 *
+	 * 1. ip6_rt_check(): If route was host route,
+	 *    check that cached destination is current.
+	 *    If it is network route, we still may
+	 *    check its validity using saved pointer
+	 *    to the last used address: daddr_cache.
+	 *    We do not want to save whole address now,
+	 *    (because main consumer of this service
+	 *    is tcp, which has not this problem),
+	 *    so that the last trick works only on connected
+	 *    sockets.
+	 * 2. oif also should be the same.
+	 */
+	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
+#ifdef CONFIG_IPV6_SUBTREES
+	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
+#endif
+	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+		dst_release(dst);
+		dst = NULL;
 	}
 
+out:
+	return dst;
+}
+
+static int ip6_dst_lookup_tail(struct sock *sk,
+			       struct dst_entry **dst, struct flowi6 *fl6)
+{
+	struct net *net = sock_net(sk);
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	struct neighbour *n;
+	struct rt6_info *rt;
+#endif
+	int err;
+
 	if (*dst == NULL)
-		*dst = ip6_route_output(sk, fl);
+		*dst = ip6_route_output(net, sk, fl6);
 
 	if ((err = (*dst)->error))
 		goto out_err_release;
 
-	if (ipv6_addr_any(&fl->fl6_src)) {
-		err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
-
+	if (ipv6_addr_any(&fl6->saddr)) {
+		struct rt6_info *rt = (struct rt6_info *) *dst;
+		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+					  sk ? inet6_sk(sk)->srcprefs : 0,
+					  &fl6->saddr);
 		if (err)
 			goto out_err_release;
 	}
 
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	/*
+	 * Here if the dst entry we've looked up
+	 * has a neighbour entry that is in the INCOMPLETE
+	 * state and the src address from the flow is
+	 * marked as OPTIMISTIC, we release the found
+	 * dst entry and replace it instead with the
+	 * dst entry of the nexthop router
+	 */
+	rt = (struct rt6_info *) *dst;
+	rcu_read_lock_bh();
+	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
+	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
+	rcu_read_unlock_bh();
+
+	if (err) {
+		struct inet6_ifaddr *ifp;
+		struct flowi6 fl_gw6;
+		int redirect;
+
+		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
+				      (*dst)->dev, 1);
+
+		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
+		if (ifp)
+			in6_ifa_put(ifp);
+
+		if (redirect) {
+			/*
+			 * We need to get the dst entry for the
+			 * default router instead
+			 */
+			dst_release(*dst);
+			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
+			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
+			*dst = ip6_route_output(net, sk, &fl_gw6);
+			if ((err = (*dst)->error))
+				goto out_err_release;
+		}
+	}
+#endif
+
 	return 0;
 
 out_err_release:
+	if (err == -ENETUNREACH)
+		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 	dst_release(*dst);
 	*dst = NULL;
 	return err;
 }
-inline int ip6_ufo_append_data(struct sock *sk,
+
+/**
+ *	ip6_dst_lookup - perform route lookup on flow
+ *	@sk: socket which provides route info
+ *	@dst: pointer to dst_entry * for result
+ *	@fl6: flow to lookup
+ *
+ *	This function performs a route lookup on the given flow.
+ *
+ *	It returns zero on success, or a standard errno code on error.
+ */
+int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
+{
+	*dst = NULL;
+	return ip6_dst_lookup_tail(sk, dst, fl6);
+}
+EXPORT_SYMBOL_GPL(ip6_dst_lookup);
+
+/**
+ *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
+ *	@sk: socket which provides route info
+ *	@fl6: flow to lookup
+ *	@final_dst: final destination address for ipsec lookup
+ *
+ *	This function performs a route lookup on the given flow.
+ *
+ *	It returns a valid dst pointer on success, or a pointer encoded
+ *	error code.
+ */
+struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+				      const struct in6_addr *final_dst)
+{
+	struct dst_entry *dst = NULL;
+	int err;
+
+	err = ip6_dst_lookup_tail(sk, &dst, fl6);
+	if (err)
+		return ERR_PTR(err);
+	if (final_dst)
+		fl6->daddr = *final_dst;
+
+	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+}
+EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
+
+/**
+ *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
+ *	@sk: socket which provides the dst cache and route info
+ *	@fl6: flow to lookup
+ *	@final_dst: final destination address for ipsec lookup
+ *
+ *	This function performs a route lookup on the given flow with the
+ *	possibility of using the cached route in the socket if it is valid.
+ *	It will take the socket dst lock when operating on the dst cache.
+ *	As a result, this function can only be used in process context.
+ *
+ *	It returns a valid dst pointer on success, or a pointer encoded
+ *	error code.
+ */
+struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+					 const struct in6_addr *final_dst)
+{
+	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
+	int err;
+
+	dst = ip6_sk_dst_check(sk, dst, fl6);
+
+	err = ip6_dst_lookup_tail(sk, &dst, fl6);
+	if (err)
+		return ERR_PTR(err);
+	if (final_dst)
+		fl6->daddr = *final_dst;
+
+	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
+
+static inline int ip6_ufo_append_data(struct sock *sk,
 			int getfrag(void *from, char *to, int offset, int len,
 			int odd, struct sk_buff *skb),
 			void *from, int length, int hh_len, int fragheaderlen,
-			int transhdrlen, int mtu,unsigned int flags)
+			int transhdrlen, int mtu,unsigned int flags,
+			struct rt6_info *rt)
 
 {
 	struct sk_buff *skb;
+	struct frag_hdr fhdr;
 	int err;
 
 	/* There is support for UDP large send offload by network
@@ -788,7 +1065,7 @@ inline int ip6_ufo_append_data(struct sock *sk,
 			hh_len + fragheaderlen + transhdrlen + 20,
 			(flags & MSG_DONTWAIT), &err);
 		if (skb == NULL)
-			return -ENOMEM;
+			return err;
 
 		/* reserve space for Hardware header */
 		skb_reserve(skb, hh_len);
@@ -797,112 +1074,207 @@ inline int ip6_ufo_append_data(struct sock *sk,
 		skb_put(skb,fragheaderlen + transhdrlen);
 
 		/* initialize network header pointer */
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
-		skb->h.raw = skb->data + fragheaderlen;
+		skb->transport_header = skb->network_header + fragheaderlen;
 
-		skb->ip_summed = CHECKSUM_HW;
+		skb->protocol = htons(ETH_P_IPV6);
 		skb->csum = 0;
-		sk->sk_sndmsg_off = 0;
-	}
 
-	err = skb_append_datato_frags(sk,skb, getfrag, from,
-				      (length - transhdrlen));
-	if (!err) {
-		struct frag_hdr fhdr;
-
-		/* specify the length of each IP datagram fragment*/
-		skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) - 
-						sizeof(struct frag_hdr);
-		ipv6_select_ident(skb, &fhdr);
-		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
 		__skb_queue_tail(&sk->sk_write_queue, skb);
-
-		return 0;
+	} else if (skb_is_gso(skb)) {
+		goto append;
 	}
-	/* There is not enough support do UPD LSO,
-	 * so follow normal path
+
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	/* Specify the length of each IPv6 datagram fragment.
+	 * It has to be a multiple of 8.
 	 */
-	kfree_skb(skb);
+	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
+				     sizeof(struct frag_hdr)) & ~7;
+	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+	ipv6_select_ident(&fhdr, rt);
+	skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+
+append:
+	return skb_append_datato_frags(sk, skb, getfrag, from,
+				       (length - transhdrlen));
+}
 
-	return err;
+static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
+					       gfp_t gfp)
+{
+	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
+}
+
+static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
+						gfp_t gfp)
+{
+	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
+}
+
+static void ip6_append_data_mtu(unsigned int *mtu,
+				int *maxfraglen,
+				unsigned int fragheaderlen,
+				struct sk_buff *skb,
+				struct rt6_info *rt,
+				unsigned int orig_mtu)
+{
+	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
+		if (skb == NULL) {
+			/* first fragment, reserve header_len */
+			*mtu = orig_mtu - rt->dst.header_len;
+
+		} else {
+			/*
+			 * this fragment is not first, the headers
+			 * space is regarded as data space.
+			 */
+			*mtu = orig_mtu;
+		}
+		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
+			      + fragheaderlen - sizeof(struct frag_hdr);
+	}
 }
 
 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int offset, int len, int odd, struct sk_buff *skb),
 	void *from, int length, int transhdrlen,
-	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
-	struct rt6_info *rt, unsigned int flags)
+	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
+	struct rt6_info *rt, unsigned int flags, int dontfrag)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct sk_buff *skb;
-	unsigned int maxfraglen, fragheaderlen;
+	struct inet_cork *cork;
+	struct sk_buff *skb, *skb_prev = NULL;
+	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
 	int exthdrlen;
+	int dst_exthdrlen;
 	int hh_len;
-	int mtu;
 	int copy;
 	int err;
 	int offset = 0;
-	int csummode = CHECKSUM_NONE;
+	__u8 tx_flags = 0;
 
 	if (flags&MSG_PROBE)
 		return 0;
+	cork = &inet->cork.base;
 	if (skb_queue_empty(&sk->sk_write_queue)) {
 		/*
 		 * setup for corking
 		 */
 		if (opt) {
-			if (np->cork.opt == NULL) {
-				np->cork.opt = kmalloc(opt->tot_len,
-						       sk->sk_allocation);
-				if (unlikely(np->cork.opt == NULL))
-					return -ENOBUFS;
-			} else if (np->cork.opt->tot_len < opt->tot_len) {
-				printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
+			if (WARN_ON(np->cork.opt))
 				return -EINVAL;
-			}
-			memcpy(np->cork.opt, opt, opt->tot_len);
-			inet->cork.flags |= IPCORK_OPT;
+
+			np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
+			if (unlikely(np->cork.opt == NULL))
+				return -ENOBUFS;
+
+			np->cork.opt->tot_len = opt->tot_len;
+			np->cork.opt->opt_flen = opt->opt_flen;
+			np->cork.opt->opt_nflen = opt->opt_nflen;
+
+			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
+							    sk->sk_allocation);
+			if (opt->dst0opt && !np->cork.opt->dst0opt)
+				return -ENOBUFS;
+
+			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
+							    sk->sk_allocation);
+			if (opt->dst1opt && !np->cork.opt->dst1opt)
+				return -ENOBUFS;
+
+			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
+							   sk->sk_allocation);
+			if (opt->hopopt && !np->cork.opt->hopopt)
+				return -ENOBUFS;
+
+			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
+							    sk->sk_allocation);
+			if (opt->srcrt && !np->cork.opt->srcrt)
+				return -ENOBUFS;
+
 			/* need source address above miyazawa*/
 		}
-		dst_hold(&rt->u.dst);
-		np->cork.rt = rt;
-		inet->cork.fl = *fl;
+		dst_hold(&rt->dst);
+		cork->dst = &rt->dst;
+		inet->cork.fl.u.ip6 = *fl6;
 		np->cork.hop_limit = hlimit;
 		np->cork.tclass = tclass;
-		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
-		if (dst_allfrag(rt->u.dst.path))
-			inet->cork.flags |= IPCORK_ALLFRAG;
-		inet->cork.length = 0;
-		sk->sk_sndmsg_page = NULL;
-		sk->sk_sndmsg_off = 0;
-		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
+		if (rt->dst.flags & DST_XFRM_TUNNEL)
+			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+			      rt->dst.dev->mtu : dst_mtu(&rt->dst);
+		else
+			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+		if (np->frag_size < mtu) {
+			if (np->frag_size)
+				mtu = np->frag_size;
+		}
+		cork->fragsize = mtu;
+		if (dst_allfrag(rt->dst.path))
+			cork->flags |= IPCORK_ALLFRAG;
+		cork->length = 0;
+		exthdrlen = (opt ? opt->opt_flen : 0);
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
+		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
 	} else {
-		rt = np->cork.rt;
-		fl = &inet->cork.fl;
-		if (inet->cork.flags & IPCORK_OPT)
-			opt = np->cork.opt;
+		rt = (struct rt6_info *)cork->dst;
+		fl6 = &inet->cork.fl.u.ip6;
+		opt = np->cork.opt;
 		transhdrlen = 0;
 		exthdrlen = 0;
-		mtu = inet->cork.fragsize;
+		dst_exthdrlen = 0;
+		mtu = cork->fragsize;
 	}
+	orig_mtu = mtu;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
-	fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
-	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
+	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
+			(opt ? opt->opt_nflen : 0);
+	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+		     sizeof(struct frag_hdr);
 
 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
-		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
-			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
+		unsigned int maxnonfragsize, headersize;
+
+		headersize = sizeof(struct ipv6hdr) +
+			     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
+			     (dst_allfrag(&rt->dst) ?
+			      sizeof(struct frag_hdr) : 0) +
+			     rt->rt6i_nfheader_len;
+
+		if (ip6_sk_ignore_df(sk))
+			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+		else
+			maxnonfragsize = mtu;
+
+		/* dontfrag active */
+		if ((cork->length + length > mtu - headersize) && dontfrag &&
+		    (sk->sk_protocol == IPPROTO_UDP ||
+		     sk->sk_protocol == IPPROTO_RAW)) {
+			ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
+						   sizeof(struct ipv6hdr));
+			goto emsgsize;
+		}
+
+		if (cork->length + length > maxnonfragsize - headersize) {
+emsgsize:
+			ipv6_local_error(sk, EMSGSIZE, fl6,
+					 mtu - headersize +
+					 sizeof(struct ipv6hdr));
 			return -EMSGSIZE;
 		}
 	}
 
+	/* For UDP, check if TX timestamp is enabled */
+	if (sk->sk_type == SOCK_DGRAM)
+		sock_tx_timestamp(sk, &tx_flags);
+
 	/*
 	 * Let's try using as much space as possible.
 	 * Use MTU if total length of the message fits into the MTU.
@@ -910,32 +1282,35 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	 * fragment alignment (= 8-15 octects, in total).
 	 *
 	 * Note that we may need to "move" the data from the tail of
-	 * of the buffer to the new fragment when we split 
+	 * of the buffer to the new fragment when we split
 	 * the message.
 	 *
-	 * FIXME: It may be fragmented into multiple chunks 
+	 * FIXME: It may be fragmented into multiple chunks
 	 *        at once if non-fragmentable extension headers
 	 *        are too large.
-	 * --yoshfuji 
+	 * --yoshfuji
 	 */
 
-	inet->cork.length += length;
-	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
-
-		if(ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
-				fragheaderlen, transhdrlen, mtu, flags))
+	skb = skb_peek_tail(&sk->sk_write_queue);
+	cork->length += length;
+	if (((length > mtu) ||
+	     (skb && skb_is_gso(skb))) &&
+	    (sk->sk_protocol == IPPROTO_UDP) &&
+	    (rt->dst.dev->features & NETIF_F_UFO)) {
+		err = ip6_ufo_append_data(sk, getfrag, from, length,
+					  hh_len, fragheaderlen,
+					  transhdrlen, mtu, flags, rt);
+		if (err)
 			goto error;
-
 		return 0;
 	}
 
-	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+	if (!skb)
 		goto alloc_new_skb;
 
 	while (length > 0) {
 		/* Check if the remaining data fits into current packet. */
-		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
+		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
 		if (copy < length)
 			copy = maxfraglen - skb->len;
 
@@ -945,42 +1320,50 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
-			struct sk_buff *skb_prev;
 alloc_new_skb:
-			skb_prev = skb;
-
 			/* There's no room in the current skb */
-			if (skb_prev)
-				fraggap = skb_prev->len - maxfraglen;
+			if (skb)
+				fraggap = skb->len - maxfraglen;
 			else
 				fraggap = 0;
+			/* update mtu and maxfraglen if necessary */
+			if (skb == NULL || skb_prev == NULL)
+				ip6_append_data_mtu(&mtu, &maxfraglen,
+						    fragheaderlen, skb, rt,
+						    orig_mtu);
+
+			skb_prev = skb;
 
 			/*
 			 * If remaining data exceeds the mtu,
 			 * we know we need more fragment(s).
 			 */
 			datalen = length + fraggap;
-			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
-				datalen = maxfraglen - fragheaderlen;
 
-			fraglen = datalen + fragheaderlen;
+			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
+				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
 			if ((flags & MSG_MORE) &&
-			    !(rt->u.dst.dev->features&NETIF_F_SG))
+			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
 
-			/*
-			 * The last fragment gets additional space at tail.
-			 * Note: we overallocate on fragments with MSG_MODE
-			 * because we have no idea if we're the last one.
-			 */
-			if (datalen == length + fraggap)
-				alloclen += rt->u.dst.trailer_len;
+			alloclen += dst_exthdrlen;
+
+			if (datalen != length + fraggap) {
+				/*
+				 * this is not the last fragment, the trailer
+				 * space is regarded as data space.
+				 */
+				datalen += rt->dst.trailer_len;
+			}
+
+			alloclen += rt->dst.trailer_len;
+			fraglen = datalen + fragheaderlen;
 
 			/*
 			 * We just reserve space for fragment header.
-			 * Note: this may be overallocation if the message 
+			 * Note: this may be overallocation if the message
 			 * (without MSG_MORE) fits into the MTU.
 			 */
 			alloclen += sizeof(struct frag_hdr);
@@ -998,25 +1381,36 @@ alloc_new_skb:
 							   sk->sk_allocation);
 				if (unlikely(skb == NULL))
 					err = -ENOBUFS;
+				else {
+					/* Only the initial fragment
+					 * is time stamped.
+					 */
+					tx_flags = 0;
+				}
 			}
 			if (skb == NULL)
 				goto error;
 			/*
 			 *	Fill in the control structures
 			 */
-			skb->ip_summed = csummode;
+			skb->protocol = htons(ETH_P_IPV6);
+			skb->ip_summed = CHECKSUM_NONE;
 			skb->csum = 0;
-			/* reserve for fragmentation */
-			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
+			/* reserve for fragmentation and ipsec header */
+			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
+				    dst_exthdrlen);
+
+			if (sk->sk_type == SOCK_DGRAM)
+				skb_shinfo(skb)->tx_flags = tx_flags;
 
 			/*
 			 *	Find where to start putting bytes
 			 */
 			data = skb_put(skb, fraglen);
-			skb->nh.raw = data + exthdrlen;
+			skb_set_network_header(skb, exthdrlen);
 			data += fragheaderlen;
-			skb->h.raw = data + exthdrlen;
-
+			skb->transport_header = (skb->network_header +
+						 fragheaderlen);
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(
 					skb_prev, maxfraglen,
@@ -1024,9 +1418,10 @@ alloc_new_skb:
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
 				data += fraggap;
-				skb_trim(skb_prev, maxfraglen);
+				pskb_trim_unique(skb_prev, maxfraglen);
 			}
 			copy = datalen - transhdrlen - fraggap;
+
 			if (copy < 0) {
 				err = -EINVAL;
 				kfree_skb(skb);
@@ -1041,7 +1436,7 @@ alloc_new_skb:
 			length -= datalen - fraggap;
 			transhdrlen = 0;
 			exthdrlen = 0;
-			csummode = CHECKSUM_NONE;
+			dst_exthdrlen = 0;
 
 			/*
 			 * Put the packet on the pending queue
@@ -1053,7 +1448,7 @@ alloc_new_skb:
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 			unsigned int off;
 
 			off = skb->len;
@@ -1065,60 +1460,69 @@ alloc_new_skb:
 			}
 		} else {
 			int i = skb_shinfo(skb)->nr_frags;
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
-			struct page *page = sk->sk_sndmsg_page;
-			int off = sk->sk_sndmsg_off;
-			unsigned int left;
-
-			if (page && (left = PAGE_SIZE - off) > 0) {
-				if (copy >= left)
-					copy = left;
-				if (page != frag->page) {
-					if (i == MAX_SKB_FRAGS) {
-						err = -EMSGSIZE;
-						goto error;
-					}
-					get_page(page);
-					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
-					frag = &skb_shinfo(skb)->frags[i];
-				}
-			} else if(i < MAX_SKB_FRAGS) {
-				if (copy > PAGE_SIZE)
-					copy = PAGE_SIZE;
-				page = alloc_pages(sk->sk_allocation, 0);
-				if (page == NULL) {
-					err = -ENOMEM;
-					goto error;
-				}
-				sk->sk_sndmsg_page = page;
-				sk->sk_sndmsg_off = 0;
+			struct page_frag *pfrag = sk_page_frag(sk);
 
-				skb_fill_page_desc(skb, i, page, 0, 0);
-				frag = &skb_shinfo(skb)->frags[i];
-				skb->truesize += PAGE_SIZE;
-				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
-			} else {
-				err = -EMSGSIZE;
-				goto error;
-			}
-			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
-				err = -EFAULT;
+			err = -ENOMEM;
+			if (!sk_page_frag_refill(sk, pfrag))
 				goto error;
+
+			if (!skb_can_coalesce(skb, i, pfrag->page,
+					      pfrag->offset)) {
+				err = -EMSGSIZE;
+				if (i == MAX_SKB_FRAGS)
+					goto error;
+
+				__skb_fill_page_desc(skb, i, pfrag->page,
+						     pfrag->offset, 0);
+				skb_shinfo(skb)->nr_frags = ++i;
+				get_page(pfrag->page);
 			}
-			sk->sk_sndmsg_off += copy;
-			frag->size += copy;
+			copy = min_t(int, copy, pfrag->size - pfrag->offset);
+			if (getfrag(from,
+				    page_address(pfrag->page) + pfrag->offset,
+				    offset, copy, skb->len, skb) < 0)
+				goto error_efault;
+
+			pfrag->offset += copy;
+			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
 			skb->len += copy;
 			skb->data_len += copy;
+			skb->truesize += copy;
+			atomic_add(copy, &sk->sk_wmem_alloc);
 		}
 		offset += copy;
 		length -= copy;
 	}
+
 	return 0;
+
+error_efault:
+	err = -EFAULT;
 error:
-	inet->cork.length -= length;
-	IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	cork->length -= length;
+	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 	return err;
 }
+EXPORT_SYMBOL_GPL(ip6_append_data);
+
+static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
+{
+	if (np->cork.opt) {
+		kfree(np->cork.opt->dst0opt);
+		kfree(np->cork.opt->dst1opt);
+		kfree(np->cork.opt->hopopt);
+		kfree(np->cork.opt->srcrt);
+		kfree(np->cork.opt);
+		np->cork.opt = NULL;
+	}
+
+	if (inet->cork.base.dst) {
+		dst_release(inet->cork.base.dst);
+		inet->cork.base.dst = NULL;
+		inet->cork.base.flags &= ~IPCORK_ALLFRAG;
+	}
+	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+}
 
 int ip6_push_pending_frames(struct sock *sk)
 {
@@ -1127,11 +1531,12 @@ int ip6_push_pending_frames(struct sock *sk)
 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ipv6hdr *hdr;
 	struct ipv6_txoptions *opt = np->cork.opt;
-	struct rt6_info *rt = np->cork.rt;
-	struct flowi *fl = &inet->cork.fl;
-	unsigned char proto = fl->proto;
+	struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
+	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
+	unsigned char proto = fl6->flowi6_proto;
 	int err = 0;
 
 	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
@@ -1139,89 +1544,79 @@ int ip6_push_pending_frames(struct sock *sk)
 	tail_skb = &(skb_shinfo(skb)->frag_list);
 
 	/* move skb->data to ip header from ext header */
-	if (skb->data < skb->nh.raw)
-		__skb_pull(skb, skb->nh.raw - skb->data);
+	if (skb->data < skb_network_header(skb))
+		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
-		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+		__skb_pull(tmp_skb, skb_network_header_len(skb));
 		*tail_skb = tmp_skb;
 		tail_skb = &(tmp_skb->next);
 		skb->len += tmp_skb->len;
 		skb->data_len += tmp_skb->len;
 		skb->truesize += tmp_skb->truesize;
-		__sock_put(tmp_skb->sk);
 		tmp_skb->destructor = NULL;
 		tmp_skb->sk = NULL;
 	}
 
-	ipv6_addr_copy(final_dst, &fl->fl6_dst);
-	__skb_pull(skb, skb->h.raw - skb->nh.raw);
+	/* Allow local fragmentation. */
+	skb->ignore_df = ip6_sk_ignore_df(sk);
+
+	*final_dst = fl6->daddr;
+	__skb_pull(skb, skb_network_header_len(skb));
 	if (opt && opt->opt_flen)
 		ipv6_push_frag_opts(skb, opt, &proto);
 	if (opt && opt->opt_nflen)
 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
 
-	skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
-	
-	*(u32*)hdr = fl->fl6_flowlabel |
-		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	hdr = ipv6_hdr(skb);
 
-	if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
-		hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
-	else
-		hdr->payload_len = 0;
+	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
 	hdr->hop_limit = np->cork.hop_limit;
 	hdr->nexthdr = proto;
-	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
-	ipv6_addr_copy(&hdr->daddr, final_dst);
+	hdr->saddr = fl6->saddr;
+	hdr->daddr = *final_dst;
 
-	skb->dst = dst_clone(&rt->u.dst);
-	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);	
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
+	skb->priority = sk->sk_priority;
+	skb->mark = sk->sk_mark;
+
+	skb_dst_set(skb, dst_clone(&rt->dst));
+	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+	if (proto == IPPROTO_ICMPV6) {
+		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+
+		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+	}
+
+	err = ip6_local_out(skb);
 	if (err) {
 		if (err > 0)
-			err = np->recverr ? net_xmit_errno(err) : 0;
+			err = net_xmit_errno(err);
 		if (err)
 			goto error;
 	}
 
 out:
-	inet->cork.flags &= ~IPCORK_OPT;
-	if (np->cork.opt) {
-		kfree(np->cork.opt);
-		np->cork.opt = NULL;
-	}
-	if (np->cork.rt) {
-		dst_release(&np->cork.rt->u.dst);
-		np->cork.rt = NULL;
-		inet->cork.flags &= ~IPCORK_ALLFRAG;
-	}
-	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+	ip6_cork_release(inet, np);
 	return err;
 error:
+	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 	goto out;
 }
+EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
 
 void ip6_flush_pending_frames(struct sock *sk)
 {
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
 
 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
-		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+		if (skb_dst(skb))
+			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
+				      IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
 	}
 
-	inet->cork.flags &= ~IPCORK_OPT;
-
-	if (np->cork.opt) {
-		kfree(np->cork.opt);
-		np->cork.opt = NULL;
-	}
-	if (np->cork.rt) {
-		dst_release(&np->cork.rt->u.dst);
-		np->cork.rt = NULL;
-		inet->cork.flags &= ~IPCORK_ALLFRAG;
-	}
-	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
 }
+EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cf94372d1af..afa08245836 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1,14 +1,13 @@
 /*
- *	IPv6 over IPv6 tunnel device
+ *	IPv6 tunneling device
  *	Linux INET6 implementation
  *
  *	Authors:
- *	Ville Nuorvala		<vnuorval@tcs.hut.fi>	
- *
- *	$Id$
+ *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
+ *	Yasuyuki Kozakai	<kozakai@linux-ipv6.org>
  *
  *      Based on:
- *      linux/net/ipv6/sit.c
+ *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  *
  *      RFC 2473
  *
@@ -19,15 +18,17 @@
  *
  */
 
-#include <linux/config.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/sockios.h>
+#include <linux/icmp.h>
 #include <linux/if.h>
 #include <linux/in.h>
 #include <linux/ip.h>
-#include <linux/if_tunnel.h>
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
@@ -37,61 +38,105 @@
 #include <linux/route.h>
 #include <linux/rtnetlink.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/etherdevice.h>
 
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
+#include <net/icmp.h>
 #include <net/ip.h>
+#include <net/ip_tunnels.h>
 #include <net/ipv6.h>
-#include <net/protocol.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
 #include <net/ip6_tunnel.h>
 #include <net/xfrm.h>
 #include <net/dsfield.h>
 #include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 MODULE_AUTHOR("Ville Nuorvala");
-MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
+MODULE_DESCRIPTION("IPv6 tunneling device");
 MODULE_LICENSE("GPL");
-
-#define IPV6_TLV_TEL_DST_SIZE 8
+MODULE_ALIAS_RTNL_LINK("ip6tnl");
+MODULE_ALIAS_NETDEV("ip6tnl0");
 
 #ifdef IP6_TNL_DEBUG
-#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __FUNCTION__)
+#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__)
 #else
 #define IP6_TNL_TRACE(x...) do {;} while(0)
 #endif
 
-#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
-
-#define HASH_SIZE  32
+#define HASH_SIZE_SHIFT  5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
 
-#define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
-	             (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
-                    (HASH_SIZE - 1))
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
-static int ip6ip6_tnl_dev_init(struct net_device *dev);
-static void ip6ip6_tnl_dev_setup(struct net_device *dev);
+static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+{
+	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
 
-/* the IPv6 tunnel fallback device */
-static struct net_device *ip6ip6_fb_tnl_dev;
+	return hash_32(hash, HASH_SIZE_SHIFT);
+}
 
+static int ip6_tnl_dev_init(struct net_device *dev);
+static void ip6_tnl_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops ip6_link_ops __read_mostly;
+
+static int ip6_tnl_net_id __read_mostly;
+struct ip6_tnl_net {
+	/* the IPv6 tunnel fallback device */
+	struct net_device *fb_tnl_dev;
+	/* lists for storing tunnels in use */
+	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+	struct ip6_tnl __rcu *tnls_wc[1];
+	struct ip6_tnl __rcu **tnls[2];
+};
 
-/* lists for storing tunnels in use */
-static struct ip6_tnl *tnls_r_l[HASH_SIZE];
-static struct ip6_tnl *tnls_wc[1];
-static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
+static struct net_device_stats *ip6_get_stats(struct net_device *dev)
+{
+	struct pcpu_sw_netstats tmp, sum = { 0 };
+	int i;
+
+	for_each_possible_cpu(i) {
+		unsigned int start;
+		const struct pcpu_sw_netstats *tstats =
+						   per_cpu_ptr(dev->tstats, i);
+
+		do {
+			start = u64_stats_fetch_begin_irq(&tstats->syncp);
+			tmp.rx_packets = tstats->rx_packets;
+			tmp.rx_bytes = tstats->rx_bytes;
+			tmp.tx_packets = tstats->tx_packets;
+			tmp.tx_bytes =  tstats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
+
+		sum.rx_packets += tmp.rx_packets;
+		sum.rx_bytes   += tmp.rx_bytes;
+		sum.tx_packets += tmp.tx_packets;
+		sum.tx_bytes   += tmp.tx_bytes;
+	}
+	dev->stats.rx_packets = sum.rx_packets;
+	dev->stats.rx_bytes   = sum.rx_bytes;
+	dev->stats.tx_packets = sum.tx_packets;
+	dev->stats.tx_bytes   = sum.tx_bytes;
+	return &dev->stats;
+}
 
-/* lock for the tunnel lists */
-static DEFINE_RWLOCK(ip6ip6_lock);
+/*
+ * Locking : hash tables are protected by RCU and RTNL
+ */
 
-static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
 {
 	struct dst_entry *dst = t->dst_cache;
 
-	if (dst && dst->obsolete && 
+	if (dst && dst->obsolete &&
 	    dst->ops->check(dst, t->dst_cookie) == NULL) {
 		t->dst_cache = NULL;
 		dst_release(dst);
@@ -100,223 +145,251 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
 
 	return dst;
 }
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
 
-static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
+void ip6_tnl_dst_reset(struct ip6_tnl *t)
 {
 	dst_release(t->dst_cache);
 	t->dst_cache = NULL;
 }
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
 
-static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *) dst;
 	t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
 	dst_release(t->dst_cache);
 	t->dst_cache = dst;
 }
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
 
 /**
- * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
- *   @remote: the address of the tunnel exit-point 
- *   @local: the address of the tunnel entry-point 
+ * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ *   @remote: the address of the tunnel exit-point
+ *   @local: the address of the tunnel entry-point
  *
- * Return:  
+ * Return:
  *   tunnel matching given end-points if found,
- *   else fallback tunnel if its device is up, 
+ *   else fallback tunnel if its device is up,
  *   else %NULL
  **/
 
+#define for_each_ip6_tunnel_rcu(start) \
+	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
 static struct ip6_tnl *
-ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
+ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
 {
-	unsigned h0 = HASH(remote);
-	unsigned h1 = HASH(local);
+	unsigned int hash = HASH(remote, local);
 	struct ip6_tnl *t;
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
-	for (t = tnls_r_l[h0 ^ h1]; t; t = t->next) {
+	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 		if (ipv6_addr_equal(local, &t->parms.laddr) &&
 		    ipv6_addr_equal(remote, &t->parms.raddr) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	if ((t = tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
+	t = rcu_dereference(ip6n->tnls_wc[0]);
+	if (t && (t->dev->flags & IFF_UP))
 		return t;
 
 	return NULL;
 }
 
 /**
- * ip6ip6_bucket - get head of list matching given tunnel parameters
- *   @p: parameters containing tunnel end-points 
+ * ip6_tnl_bucket - get head of list matching given tunnel parameters
+ *   @p: parameters containing tunnel end-points
  *
  * Description:
- *   ip6ip6_bucket() returns the head of the list matching the 
+ *   ip6_tnl_bucket() returns the head of the list matching the
  *   &struct in6_addr entries laddr and raddr in @p.
  *
- * Return: head of IPv6 tunnel list 
+ * Return: head of IPv6 tunnel list
  **/
 
-static struct ip6_tnl **
-ip6ip6_bucket(struct ip6_tnl_parm *p)
+static struct ip6_tnl __rcu **
+ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
 {
-	struct in6_addr *remote = &p->raddr;
-	struct in6_addr *local = &p->laddr;
-	unsigned h = 0;
+	const struct in6_addr *remote = &p->raddr;
+	const struct in6_addr *local = &p->laddr;
+	unsigned int h = 0;
 	int prio = 0;
 
 	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
 		prio = 1;
-		h = HASH(remote) ^ HASH(local);
+		h = HASH(remote, local);
 	}
-	return &tnls[prio][h];
+	return &ip6n->tnls[prio][h];
 }
 
 /**
- * ip6ip6_tnl_link - add tunnel to hash table
+ * ip6_tnl_link - add tunnel to hash table
  *   @t: tunnel to be added
  **/
 
 static void
-ip6ip6_tnl_link(struct ip6_tnl *t)
+ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 {
-	struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
+	struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
 
-	t->next = *tp;
-	write_lock_bh(&ip6ip6_lock);
-	*tp = t;
-	write_unlock_bh(&ip6ip6_lock);
+	rcu_assign_pointer(t->next , rtnl_dereference(*tp));
+	rcu_assign_pointer(*tp, t);
 }
 
 /**
- * ip6ip6_tnl_unlink - remove tunnel from hash table
+ * ip6_tnl_unlink - remove tunnel from hash table
  *   @t: tunnel to be removed
  **/
 
 static void
-ip6ip6_tnl_unlink(struct ip6_tnl *t)
+ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 {
-	struct ip6_tnl **tp;
-
-	for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
-		if (t == *tp) {
-			write_lock_bh(&ip6ip6_lock);
-			*tp = t->next;
-			write_unlock_bh(&ip6ip6_lock);
+	struct ip6_tnl __rcu **tp;
+	struct ip6_tnl *iter;
+
+	for (tp = ip6_tnl_bucket(ip6n, &t->parms);
+	     (iter = rtnl_dereference(*tp)) != NULL;
+	     tp = &iter->next) {
+		if (t == iter) {
+			rcu_assign_pointer(*tp, t->next);
 			break;
 		}
 	}
 }
 
+static void ip6_dev_free(struct net_device *dev)
+{
+	free_percpu(dev->tstats);
+	free_netdev(dev);
+}
+
+static int ip6_tnl_create2(struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+	int err;
+
+	t = netdev_priv(dev);
+	err = ip6_tnl_dev_init(dev);
+	if (err < 0)
+		goto out;
+
+	err = register_netdevice(dev);
+	if (err < 0)
+		goto out;
+
+	strcpy(t->parms.name, dev->name);
+	dev->rtnl_link_ops = &ip6_link_ops;
+
+	dev_hold(dev);
+	ip6_tnl_link(ip6n, t);
+	return 0;
+
+out:
+	return err;
+}
+
 /**
- * ip6_tnl_create() - create a new tunnel
+ * ip6_tnl_create - create a new tunnel
  *   @p: tunnel parameters
  *   @pt: pointer to new tunnel
  *
  * Description:
  *   Create tunnel matching given parameters.
- * 
- * Return: 
- *   0 on success
+ *
+ * Return:
+ *   created tunnel or NULL
  **/
 
-static int
-ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
+static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 {
 	struct net_device *dev;
 	struct ip6_tnl *t;
 	char name[IFNAMSIZ];
 	int err;
 
-	if (p->name[0]) {
+	if (p->name[0])
 		strlcpy(name, p->name, IFNAMSIZ);
-	} else {
-		int i;
-		for (i = 1; i < IP6_TNL_MAX; i++) {
-			sprintf(name, "ip6tnl%d", i);
-			if (__dev_get_by_name(name) == NULL)
-				break;
-		}
-		if (i == IP6_TNL_MAX) 
-			return -ENOBUFS;
-	}
-	dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
+	else
+		sprintf(name, "ip6tnl%%d");
+
+	dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
 	if (dev == NULL)
-		return -ENOMEM;
+		goto failed;
+
+	dev_net_set(dev, net);
 
-	t = dev->priv;
-	dev->init = ip6ip6_tnl_dev_init;
+	t = netdev_priv(dev);
 	t->parms = *p;
+	t->net = dev_net(dev);
+	err = ip6_tnl_create2(dev);
+	if (err < 0)
+		goto failed_free;
 
-	if ((err = register_netdevice(dev)) < 0) {
-		free_netdev(dev);
-		return err;
-	}
-	dev_hold(dev);
+	return t;
 
-	ip6ip6_tnl_link(t);
-	*pt = t;
-	return 0;
+failed_free:
+	ip6_dev_free(dev);
+failed:
+	return NULL;
 }
 
 /**
- * ip6ip6_tnl_locate - find or create tunnel matching given parameters
- *   @p: tunnel parameters 
+ * ip6_tnl_locate - find or create tunnel matching given parameters
+ *   @p: tunnel parameters
  *   @create: != 0 if allowed to create new tunnel if no match found
  *
  * Description:
- *   ip6ip6_tnl_locate() first tries to locate an existing tunnel
+ *   ip6_tnl_locate() first tries to locate an existing tunnel
  *   based on @parms. If this is unsuccessful, but @create is set a new
  *   tunnel device is created and registered for use.
  *
  * Return:
- *   0 if tunnel located or created,
- *   -EINVAL if parameters incorrect,
- *   -ENODEV if no matching tunnel available
+ *   matching tunnel or NULL
  **/
 
-static int
-ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
+static struct ip6_tnl *ip6_tnl_locate(struct net *net,
+		struct __ip6_tnl_parm *p, int create)
 {
-	struct in6_addr *remote = &p->raddr;
-	struct in6_addr *local = &p->laddr;
+	const struct in6_addr *remote = &p->raddr;
+	const struct in6_addr *local = &p->laddr;
+	struct ip6_tnl __rcu **tp;
 	struct ip6_tnl *t;
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
-	if (p->proto != IPPROTO_IPV6)
-		return -EINVAL;
-
-	for (t = *ip6ip6_bucket(p); t; t = t->next) {
+	for (tp = ip6_tnl_bucket(ip6n, p);
+	     (t = rtnl_dereference(*tp)) != NULL;
+	     tp = &t->next) {
 		if (ipv6_addr_equal(local, &t->parms.laddr) &&
-		    ipv6_addr_equal(remote, &t->parms.raddr)) {
-			*pt = t;
-			return (create ? -EEXIST : 0);
-		}
+		    ipv6_addr_equal(remote, &t->parms.raddr))
+			return t;
 	}
 	if (!create)
-		return -ENODEV;
-	
-	return ip6_tnl_create(p, pt);
+		return NULL;
+	return ip6_tnl_create(net, p);
 }
 
 /**
- * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
+ * ip6_tnl_dev_uninit - tunnel device uninitializer
  *   @dev: the device to be destroyed
- *   
+ *
  * Description:
- *   ip6ip6_tnl_dev_uninit() removes tunnel from its list
+ *   ip6_tnl_dev_uninit() removes tunnel from its list
  **/
 
 static void
-ip6ip6_tnl_dev_uninit(struct net_device *dev)
+ip6_tnl_dev_uninit(struct net_device *dev)
 {
-	struct ip6_tnl *t = dev->priv;
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = t->net;
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
-	if (dev == ip6ip6_fb_tnl_dev) {
-		write_lock_bh(&ip6ip6_lock);
-		tnls_wc[0] = NULL;
-		write_unlock_bh(&ip6ip6_lock);
-	} else {
-		ip6ip6_tnl_unlink(t);
-	}
+	if (dev == ip6n->fb_tnl_dev)
+		RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
+	else
+		ip6_tnl_unlink(ip6n, t);
 	ip6_tnl_dst_reset(t);
 	dev_put(dev);
 }
@@ -325,15 +398,14 @@ ip6ip6_tnl_dev_uninit(struct net_device *dev)
  * parse_tvl_tnl_enc_lim - handle encapsulation limit option
  *   @skb: received socket buffer
  *
- * Return: 
- *   0 if none was found, 
+ * Return:
+ *   0 if none was found,
  *   else index to encapsulation limit
  **/
 
-static __u16
-parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
+__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 {
-	struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
+	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
 	__u8 nexthdr = ipv6h->nexthdr;
 	__u16 off = sizeof (*ipv6h);
 
@@ -381,83 +453,83 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
 
 /**
- * ip6ip6_err - tunnel error handler
+ * ip6_tnl_err - tunnel error handler
  *
  * Description:
- *   ip6ip6_err() should handle errors in the tunnel according
+ *   ip6_tnl_err() should handle errors in the tunnel according
  *   to the specifications in RFC 2473.
  **/
 
-static void 
-ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-	   int type, int code, int offset, __u32 info)
+static int
+ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
+	    u8 *type, u8 *code, int *msg, __u32 *info, int offset)
 {
-	struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
+	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
 	struct ip6_tnl *t;
 	int rel_msg = 0;
-	int rel_type = ICMPV6_DEST_UNREACH;
-	int rel_code = ICMPV6_ADDR_UNREACH;
+	u8 rel_type = ICMPV6_DEST_UNREACH;
+	u8 rel_code = ICMPV6_ADDR_UNREACH;
 	__u32 rel_info = 0;
 	__u16 len;
+	int err = -ENOENT;
 
-	/* If the packet doesn't contain the original IPv6 header we are 
-	   in trouble since we might need the source address for further 
+	/* If the packet doesn't contain the original IPv6 header we are
+	   in trouble since we might need the source address for further
 	   processing of the error. */
 
-	read_lock(&ip6ip6_lock);
-	if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+	rcu_read_lock();
+	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
+					&ipv6h->saddr)) == NULL)
 		goto out;
 
-	switch (type) {
+	if (t->parms.proto != ipproto && t->parms.proto != 0)
+		goto out;
+
+	err = 0;
+
+	switch (*type) {
 		__u32 teli;
 		struct ipv6_tlv_tnl_enc_lim *tel;
 		__u32 mtu;
 	case ICMPV6_DEST_UNREACH:
-		if (net_ratelimit())
-			printk(KERN_WARNING
-			       "%s: Path to destination invalid "
-			       "or inactive!\n", t->parms.name);
+		net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
+				     t->parms.name);
 		rel_msg = 1;
 		break;
 	case ICMPV6_TIME_EXCEED:
-		if (code == ICMPV6_EXC_HOPLIMIT) {
-			if (net_ratelimit())
-				printk(KERN_WARNING
-				       "%s: Too small hop limit or "
-				       "routing loop in tunnel!\n", 
-				       t->parms.name);
+		if ((*code) == ICMPV6_EXC_HOPLIMIT) {
+			net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+					     t->parms.name);
 			rel_msg = 1;
 		}
 		break;
 	case ICMPV6_PARAMPROB:
-		/* ignore if parameter problem not caused by a tunnel
-		   encapsulation limit sub-option */
-		if (code != ICMPV6_HDR_FIELD) {
-			break;
-		}
-		teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+		teli = 0;
+		if ((*code) == ICMPV6_HDR_FIELD)
+			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
 
-		if (teli && teli == ntohl(info) - 2) {
+		if (teli && teli == *info - 2) {
 			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
 			if (tel->encap_limit == 0) {
-				if (net_ratelimit())
-					printk(KERN_WARNING
-					       "%s: Too small encapsulation "
-					       "limit or routing loop in "
-					       "tunnel!\n", t->parms.name);
+				net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+						     t->parms.name);
 				rel_msg = 1;
 			}
+		} else {
+			net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+					     t->parms.name);
 		}
 		break;
 	case ICMPV6_PKT_TOOBIG:
-		mtu = ntohl(info) - offset;
+		mtu = *info - offset;
 		if (mtu < IPV6_MIN_MTU)
 			mtu = IPV6_MIN_MTU;
 		t->dev->mtu = mtu;
 
-		if ((len = sizeof (*ipv6h) + ipv6h->payload_len) > mtu) {
+		if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
 			rel_type = ICMPV6_PKT_TOOBIG;
 			rel_code = 0;
 			rel_info = mtu;
@@ -465,377 +537,675 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		}
 		break;
 	}
-	if (rel_msg &&  pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
+
+	*type = rel_type;
+	*code = rel_code;
+	*info = rel_info;
+	*msg = rel_msg;
+
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+static int
+ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	   u8 type, u8 code, int offset, __be32 info)
+{
+	int rel_msg = 0;
+	u8 rel_type = type;
+	u8 rel_code = code;
+	__u32 rel_info = ntohl(info);
+	int err;
+	struct sk_buff *skb2;
+	const struct iphdr *eiph;
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
+			  &rel_msg, &rel_info, offset);
+	if (err < 0)
+		return err;
+
+	if (rel_msg == 0)
+		return 0;
+
+	switch (rel_type) {
+	case ICMPV6_DEST_UNREACH:
+		if (rel_code != ICMPV6_ADDR_UNREACH)
+			return 0;
+		rel_type = ICMP_DEST_UNREACH;
+		rel_code = ICMP_HOST_UNREACH;
+		break;
+	case ICMPV6_PKT_TOOBIG:
+		if (rel_code != 0)
+			return 0;
+		rel_type = ICMP_DEST_UNREACH;
+		rel_code = ICMP_FRAG_NEEDED;
+		break;
+	case NDISC_REDIRECT:
+		rel_type = ICMP_REDIRECT;
+		rel_code = ICMP_REDIR_HOST;
+	default:
+		return 0;
+	}
+
+	if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
+		return 0;
+
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (!skb2)
+		return 0;
+
+	skb_dst_drop(skb2);
+
+	skb_pull(skb2, offset);
+	skb_reset_network_header(skb2);
+	eiph = ip_hdr(skb2);
+
+	/* Try to guess incoming interface */
+	rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
+				   eiph->saddr, 0,
+				   0, 0,
+				   IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+	if (IS_ERR(rt))
+		goto out;
+
+	skb2->dev = rt->dst.dev;
+
+	/* route "incoming" packet */
+	if (rt->rt_flags & RTCF_LOCAL) {
+		ip_rt_put(rt);
+		rt = NULL;
+		rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
+					   eiph->daddr, eiph->saddr,
+					   0, 0,
+					   IPPROTO_IPIP,
+					   RT_TOS(eiph->tos), 0);
+		if (IS_ERR(rt) ||
+		    rt->dst.dev->type != ARPHRD_TUNNEL) {
+			if (!IS_ERR(rt))
+				ip_rt_put(rt);
+			goto out;
+		}
+		skb_dst_set(skb2, &rt->dst);
+	} else {
+		ip_rt_put(rt);
+		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
+				   skb2->dev) ||
+		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
+			goto out;
+	}
+
+	/* change mtu on this route */
+	if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
+		if (rel_info > dst_mtu(skb_dst(skb2)))
+			goto out;
+
+		skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
+	}
+	if (rel_type == ICMP_REDIRECT)
+		skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
+
+	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
+
+out:
+	kfree_skb(skb2);
+	return 0;
+}
+
+static int
+ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	   u8 type, u8 code, int offset, __be32 info)
+{
+	int rel_msg = 0;
+	u8 rel_type = type;
+	u8 rel_code = code;
+	__u32 rel_info = ntohl(info);
+	int err;
+
+	err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
+			  &rel_msg, &rel_info, offset);
+	if (err < 0)
+		return err;
+
+	if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
 		struct rt6_info *rt;
 		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
 		if (!skb2)
-			goto out;
+			return 0;
 
-		dst_release(skb2->dst);
-		skb2->dst = NULL;
+		skb_dst_drop(skb2);
 		skb_pull(skb2, offset);
-		skb2->nh.raw = skb2->data;
+		skb_reset_network_header(skb2);
 
 		/* Try to guess incoming interface */
-		rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
+		rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
+				NULL, 0, 0);
 
-		if (rt && rt->rt6i_dev)
-			skb2->dev = rt->rt6i_dev;
+		if (rt && rt->dst.dev)
+			skb2->dev = rt->dst.dev;
 
-		icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
+		icmpv6_send(skb2, rel_type, rel_code, rel_info);
 
-		if (rt)
-			dst_release(&rt->u.dst);
+		ip6_rt_put(rt);
 
 		kfree_skb(skb2);
 	}
-out:
-	read_unlock(&ip6ip6_lock);
+
+	return 0;
+}
+
+static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+				       const struct ipv6hdr *ipv6h,
+				       struct sk_buff *skb)
+{
+	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
+
+	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
+
+	return IP6_ECN_decapsulate(ipv6h, skb);
 }
 
-static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
-					  struct sk_buff *skb)
+static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+				       const struct ipv6hdr *ipv6h,
+				       struct sk_buff *skb)
 {
-	struct ipv6hdr *inner_iph = skb->nh.ipv6h;
+	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
+
+	return IP6_ECN_decapsulate(ipv6h, skb);
+}
 
-	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
-		IP6_ECN_set_ce(inner_iph);
+__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
+			     const struct in6_addr *laddr,
+			     const struct in6_addr *raddr)
+{
+	struct __ip6_tnl_parm *p = &t->parms;
+	int ltype = ipv6_addr_type(laddr);
+	int rtype = ipv6_addr_type(raddr);
+	__u32 flags = 0;
+
+	if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
+		flags = IP6_TNL_F_CAP_PER_PACKET;
+	} else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+		   rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+		   !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
+		   (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
+		if (ltype&IPV6_ADDR_UNICAST)
+			flags |= IP6_TNL_F_CAP_XMIT;
+		if (rtype&IPV6_ADDR_UNICAST)
+			flags |= IP6_TNL_F_CAP_RCV;
+	}
+	return flags;
 }
+EXPORT_SYMBOL(ip6_tnl_get_cap);
+
+/* called with rcu_read_lock() */
+int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
+				  const struct in6_addr *laddr,
+				  const struct in6_addr *raddr)
+{
+	struct __ip6_tnl_parm *p = &t->parms;
+	int ret = 0;
+	struct net *net = t->net;
+
+	if ((p->flags & IP6_TNL_F_CAP_RCV) ||
+	    ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
+	     (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
+		struct net_device *ldev = NULL;
+
+		if (p->link)
+			ldev = dev_get_by_index_rcu(net, p->link);
+
+		if ((ipv6_addr_is_multicast(laddr) ||
+		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+		    likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
+			ret = 1;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
 
 /**
- * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
+ * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
  *   @skb: received socket buffer
+ *   @protocol: ethernet protocol ID
+ *   @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
  *
  * Return: 0
  **/
 
-static int 
-ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
+		       __u8 ipproto,
+		       int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+						   const struct ipv6hdr *ipv6h,
+						   struct sk_buff *skb))
 {
-	struct sk_buff *skb = *pskb;
-	struct ipv6hdr *ipv6h;
 	struct ip6_tnl *t;
+	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	int err;
 
-	if (!pskb_may_pull(skb, sizeof (*ipv6h)))
-		goto discard;
+	rcu_read_lock();
 
-	ipv6h = skb->nh.ipv6h;
+	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
+					&ipv6h->daddr)) != NULL) {
+		struct pcpu_sw_netstats *tstats;
 
-	read_lock(&ip6ip6_lock);
+		if (t->parms.proto != ipproto && t->parms.proto != 0) {
+			rcu_read_unlock();
+			goto discard;
+		}
 
-	if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
 		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-			kfree_skb(skb);
-			return 0;
+			rcu_read_unlock();
+			goto discard;
 		}
 
-		if (!(t->parms.flags & IP6_TNL_F_CAP_RCV)) {
-			t->stat.rx_dropped++;
-			read_unlock(&ip6ip6_lock);
+		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
+			t->dev->stats.rx_dropped++;
+			rcu_read_unlock();
 			goto discard;
 		}
-		secpath_reset(skb);
-		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = skb->data;
-		skb->protocol = htons(ETH_P_IPV6);
-		skb->pkt_type = PACKET_HOST;
+		skb->mac_header = skb->network_header;
+		skb_reset_network_header(skb);
+		skb->protocol = htons(protocol);
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
-		skb->dev = t->dev;
-		dst_release(skb->dst);
-		skb->dst = NULL;
-		if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-			ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
-		ip6ip6_ecn_decapsulate(ipv6h, skb);
-		t->stat.rx_packets++;
-		t->stat.rx_bytes += skb->len;
+
+		__skb_tunnel_rx(skb, t->dev, t->net);
+
+		err = dscp_ecn_decapsulate(t, ipv6h, skb);
+		if (unlikely(err)) {
+			if (log_ecn_error)
+				net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
+						     &ipv6h->saddr,
+						     ipv6_get_dsfield(ipv6h));
+			if (err > 1) {
+				++t->dev->stats.rx_frame_errors;
+				++t->dev->stats.rx_errors;
+				rcu_read_unlock();
+				goto discard;
+			}
+		}
+
+		tstats = this_cpu_ptr(t->dev->tstats);
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->rx_packets++;
+		tstats->rx_bytes += skb->len;
+		u64_stats_update_end(&tstats->syncp);
+
 		netif_rx(skb);
-		read_unlock(&ip6ip6_lock);
+
+		rcu_read_unlock();
 		return 0;
 	}
-	read_unlock(&ip6ip6_lock);
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
-discard:
+	rcu_read_unlock();
 	return 1;
+
+discard:
+	kfree_skb(skb);
+	return 0;
 }
 
-static inline struct ipv6_txoptions *create_tel(__u8 encap_limit)
+static int ip4ip6_rcv(struct sk_buff *skb)
 {
-	struct ipv6_tlv_tnl_enc_lim *tel;
-	struct ipv6_txoptions *opt;
-	__u8 *raw;
+	return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
+			   ip4ip6_dscp_ecn_decapsulate);
+}
 
-	int opt_len = sizeof(*opt) + 8;
+static int ip6ip6_rcv(struct sk_buff *skb)
+{
+	return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
+			   ip6ip6_dscp_ecn_decapsulate);
+}
 
-	if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) {
-		return NULL;
-	}
-	memset(opt, 0, opt_len);
-	opt->tot_len = opt_len;
-	opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
-	opt->opt_nflen = 8;
+struct ipv6_tel_txoption {
+	struct ipv6_txoptions ops;
+	__u8 dst_opt[8];
+};
 
-	tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1);
-	tel->type = IPV6_TLV_TNL_ENCAP_LIMIT;
-	tel->length = 1;
-	tel->encap_limit = encap_limit;
+static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+{
+	memset(opt, 0, sizeof(struct ipv6_tel_txoption));
 
-	raw = (__u8 *) opt->dst0opt;
-	raw[5] = IPV6_TLV_PADN;
-	raw[6] = 1;
+	opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
+	opt->dst_opt[3] = 1;
+	opt->dst_opt[4] = encap_limit;
+	opt->dst_opt[5] = IPV6_TLV_PADN;
+	opt->dst_opt[6] = 1;
 
-	return opt;
+	opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
+	opt->ops.opt_nflen = 8;
 }
 
 /**
- * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
  *   @t: the outgoing tunnel device
- *   @hdr: IPv6 header from the incoming packet 
+ *   @hdr: IPv6 header from the incoming packet
  *
  * Description:
- *   Avoid trivial tunneling loop by checking that tunnel exit-point 
+ *   Avoid trivial tunneling loop by checking that tunnel exit-point
  *   doesn't match source of incoming packet.
  *
- * Return: 
+ * Return:
  *   1 if conflict,
  *   0 else
  **/
 
-static inline int
-ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
+static inline bool
+ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
 {
 	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 }
 
+int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
+{
+	struct __ip6_tnl_parm *p = &t->parms;
+	int ret = 0;
+	struct net *net = t->net;
+
+	if (p->flags & IP6_TNL_F_CAP_XMIT) {
+		struct net_device *ldev = NULL;
+
+		rcu_read_lock();
+		if (p->link)
+			ldev = dev_get_by_index_rcu(net, p->link);
+
+		if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
+			pr_warn("%s xmit: Local address not yet configured!\n",
+				p->name);
+		else if (!ipv6_addr_is_multicast(&p->raddr) &&
+			 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
+			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
+				p->name);
+		else
+			ret = 1;
+		rcu_read_unlock();
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
+
 /**
- * ip6ip6_tnl_xmit - encapsulate packet and send 
+ * ip6_tnl_xmit2 - encapsulate packet and send
  *   @skb: the outgoing socket buffer
- *   @dev: the outgoing tunnel device 
+ *   @dev: the outgoing tunnel device
+ *   @dsfield: dscp code for outer header
+ *   @fl: flow of tunneled packet
+ *   @encap_limit: encapsulation limit
+ *   @pmtu: Path MTU is stored if packet is too big
  *
  * Description:
  *   Build new header and do some sanity checks on the packet before sending
  *   it.
  *
- * Return: 
- *   0
+ * Return:
+ *   0 on success
+ *   -1 fail
+ *   %-EMSGSIZE message too big. return mtu in this case.
  **/
 
-static int 
-ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+static int ip6_tnl_xmit2(struct sk_buff *skb,
+			 struct net_device *dev,
+			 __u8 dsfield,
+			 struct flowi6 *fl6,
+			 int encap_limit,
+			 __u32 *pmtu)
 {
-	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
-	struct net_device_stats *stats = &t->stat;
-	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
-	struct ipv6_txoptions *opt = NULL;
-	int encap_limit = -1;
-	__u16 offset;
-	struct flowi fl;
-	struct dst_entry *dst;
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = t->net;
+	struct net_device_stats *stats = &t->dev->stats;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct ipv6_tel_txoption opt;
+	struct dst_entry *dst = NULL, *ndst = NULL;
 	struct net_device *tdev;
 	int mtu;
-	int max_headroom = sizeof(struct ipv6hdr);
+	unsigned int max_headroom = sizeof(struct ipv6hdr);
 	u8 proto;
-	int err;
-	int pkt_len;
-	int dsfield;
+	int err = -1;
 
-	if (t->recursion++) {
-		stats->collisions++;
-		goto tx_err;
-	}
-	if (skb->protocol != htons(ETH_P_IPV6) ||
-	    !(t->parms.flags & IP6_TNL_F_CAP_XMIT) ||
-	    ip6ip6_tnl_addr_conflict(t, ipv6h)) {
-		goto tx_err;
-	}
-	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
-		struct ipv6_tlv_tnl_enc_lim *tel;
-		tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
-		if (tel->encap_limit == 0) {
-			icmpv6_send(skb, ICMPV6_PARAMPROB,
-				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
-			goto tx_err;
-		}
-		encap_limit = tel->encap_limit - 1;
-	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
-		encap_limit = t->parms.encap_limit;
-	}
-	memcpy(&fl, &t->fl, sizeof (fl));
-	proto = fl.proto;
+	if (!fl6->flowi6_mark)
+		dst = ip6_tnl_dst_check(t);
+	if (!dst) {
+		ndst = ip6_route_output(net, NULL, fl6);
 
-	dsfield = ipv6_get_dsfield(ipv6h);
-	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
-		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK);
-	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
-		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK);
-
-	if (encap_limit >= 0 && (opt = create_tel(encap_limit)) == NULL)
-		goto tx_err;
-
-	if ((dst = ip6_tnl_dst_check(t)) != NULL)
-		dst_hold(dst);
-	else {
-		dst = ip6_route_output(NULL, &fl);
-
-		if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
+		if (ndst->error)
+			goto tx_err_link_failure;
+		ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
+		if (IS_ERR(ndst)) {
+			err = PTR_ERR(ndst);
+			ndst = NULL;
 			goto tx_err_link_failure;
+		}
+		dst = ndst;
 	}
 
 	tdev = dst->dev;
 
 	if (tdev == dev) {
 		stats->collisions++;
-		if (net_ratelimit())
-			printk(KERN_WARNING 
-			       "%s: Local routing loop detected!\n",
-			       t->parms.name);
+		net_warn_ratelimited("%s: Local routing loop detected!\n",
+				     t->parms.name);
 		goto tx_err_dst_release;
 	}
 	mtu = dst_mtu(dst) - sizeof (*ipv6h);
-	if (opt) {
+	if (encap_limit >= 0) {
 		max_headroom += 8;
 		mtu -= 8;
 	}
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
-	if (skb->dst && mtu < dst_mtu(skb->dst)) {
-		struct rt6_info *rt = (struct rt6_info *) skb->dst;
-		rt->rt6i_flags |= RTF_MODIFIED;
-		rt->u.dst.metrics[RTAX_MTU-1] = mtu;
-	}
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 	if (skb->len > mtu) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+		*pmtu = mtu;
+		err = -EMSGSIZE;
 		goto tx_err_dst_release;
 	}
 
+	skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+
 	/*
 	 * Okay, now see if we can stuff it in the buffer as-is.
 	 */
 	max_headroom += LL_RESERVED_SPACE(tdev);
-	
-	if (skb_headroom(skb) < max_headroom || 
-	    skb_cloned(skb) || skb_shared(skb)) {
+
+	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 		struct sk_buff *new_skb;
-		
+
 		if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
 			goto tx_err_dst_release;
 
 		if (skb->sk)
 			skb_set_owner_w(new_skb, skb->sk);
-		kfree_skb(skb);
+		consume_skb(skb);
 		skb = new_skb;
 	}
-	dst_release(skb->dst);
-	skb->dst = dst_clone(dst);
+	if (fl6->flowi6_mark) {
+		skb_dst_set(skb, dst);
+		ndst = NULL;
+	} else {
+		skb_dst_set_noref(skb, dst);
+	}
+	skb->transport_header = skb->network_header;
 
-	skb->h.raw = skb->nh.raw;
+	proto = fl6->flowi6_proto;
+	if (encap_limit >= 0) {
+		init_tel_txopt(&opt, encap_limit);
+		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+	}
 
-	if (opt)
-		ipv6_push_nfrag_opts(skb, opt, &proto, NULL);
+	if (likely(!skb->encapsulation)) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+	}
 
-	skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
-	ipv6h = skb->nh.ipv6h;
-	*(u32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
-	dsfield = INET_ECN_encapsulate(0, dsfield);
-	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
-	ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	ipv6h = ipv6_hdr(skb);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
-	ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
-	ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
-	nf_reset(skb);
-	pkt_len = skb->len;
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, 
-		      skb->dst->dev, dst_output);
-
-	if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {
-		stats->tx_bytes += pkt_len;
-		stats->tx_packets++;
-	} else {
-		stats->tx_errors++;
-		stats->tx_aborted_errors++;
-	}
-	ip6_tnl_dst_store(t, dst);
-
-	if (opt)
-		kfree(opt);
-
-	t->recursion--;
+	ipv6h->saddr = fl6->saddr;
+	ipv6h->daddr = fl6->daddr;
+	ip6tunnel_xmit(skb, dev);
+	if (ndst)
+		ip6_tnl_dst_store(t, ndst);
 	return 0;
 tx_err_link_failure:
 	stats->tx_carrier_errors++;
 	dst_link_failure(skb);
 tx_err_dst_release:
-	dst_release(dst);
-	if (opt)
-		kfree(opt);
-tx_err:
-	stats->tx_errors++;
-	stats->tx_dropped++;
-	kfree_skb(skb);
-	t->recursion--;
-	return 0;
+	dst_release(ndst);
+	return err;
 }
 
-static void ip6_tnl_set_cap(struct ip6_tnl *t)
+static inline int
+ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ip6_tnl_parm *p = &t->parms;
-	struct in6_addr *laddr = &p->laddr;
-	struct in6_addr *raddr = &p->raddr;
-	int ltype = ipv6_addr_type(laddr);
-	int rtype = ipv6_addr_type(raddr);
+	struct ip6_tnl *t = netdev_priv(dev);
+	const struct iphdr  *iph = ip_hdr(skb);
+	int encap_limit = -1;
+	struct flowi6 fl6;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
 
-	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
+	if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
+	    !ip6_tnl_xmit_ctl(t))
+		return -1;
 
-	if (ltype != IPV6_ADDR_ANY && rtype != IPV6_ADDR_ANY &&
-	    ((ltype|rtype) &
-	     (IPV6_ADDR_UNICAST|
-	      IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL|
-	      IPV6_ADDR_MAPPED|IPV6_ADDR_RESERVED)) == IPV6_ADDR_UNICAST) {
-		struct net_device *ldev = NULL;
-		int l_ok = 1;
-		int r_ok = 1;
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
 
-		if (p->link)
-			ldev = dev_get_by_index(p->link);
-		
-		if (ltype&IPV6_ADDR_UNICAST && !ipv6_chk_addr(laddr, ldev, 0))
-			l_ok = 0;
-		
-		if (rtype&IPV6_ADDR_UNICAST && ipv6_chk_addr(raddr, NULL, 0))
-			r_ok = 0;
-		
-		if (l_ok && r_ok) {
-			if (ltype&IPV6_ADDR_UNICAST)
-				p->flags |= IP6_TNL_F_CAP_XMIT;
-			if (rtype&IPV6_ADDR_UNICAST)
-				p->flags |= IP6_TNL_F_CAP_RCV;
+	memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
+	fl6.flowi6_proto = IPPROTO_IPIP;
+
+	dsfield = ipv4_get_dsfield(iph);
+
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+					  & IPV6_TCLASS_MASK;
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+		fl6.flowi6_mark = skb->mark;
+
+	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+	if (err != 0) {
+		/* XXX: send ICMP error even if DF is not set. */
+		if (err == -EMSGSIZE)
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline int
+ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	int encap_limit = -1;
+	__u16 offset;
+	struct flowi6 fl6;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
+
+	if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+	    !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
+		return -1;
+
+	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+	if (offset > 0) {
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+		if (tel->encap_limit == 0) {
+			icmpv6_send(skb, ICMPV6_PARAMPROB,
+				    ICMPV6_HDR_FIELD, offset + 2);
+			return -1;
 		}
-		if (ldev)
-			dev_put(ldev);
+		encap_limit = tel->encap_limit - 1;
+	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
+	fl6.flowi6_proto = IPPROTO_IPV6;
+
+	dsfield = ipv6_get_dsfield(ipv6h);
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+		fl6.flowlabel |= ip6_flowlabel(ipv6h);
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+		fl6.flowi6_mark = skb->mark;
+
+	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+	if (err != 0) {
+		if (err == -EMSGSIZE)
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		return -1;
+	}
+
+	return 0;
+}
+
+static netdev_tx_t
+ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net_device_stats *stats = &t->dev->stats;
+	int ret;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ret = ip4ip6_tnl_xmit(skb, dev);
+		break;
+	case htons(ETH_P_IPV6):
+		ret = ip6ip6_tnl_xmit(skb, dev);
+		break;
+	default:
+		goto tx_err;
 	}
+
+	if (ret < 0)
+		goto tx_err;
+
+	return NETDEV_TX_OK;
+
+tx_err:
+	stats->tx_errors++;
+	stats->tx_dropped++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
 }
 
-static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
+static void ip6_tnl_link_config(struct ip6_tnl *t)
 {
 	struct net_device *dev = t->dev;
-	struct ip6_tnl_parm *p = &t->parms;
-	struct flowi *fl = &t->fl;
+	struct __ip6_tnl_parm *p = &t->parms;
+	struct flowi6 *fl6 = &t->fl.u.ip6;
 
-	memcpy(&dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
-	memcpy(&dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
 
 	/* Set up flowi template */
-	ipv6_addr_copy(&fl->fl6_src, &p->laddr);
-	ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
-	fl->oif = p->link;
-	fl->fl6_flowlabel = 0;
+	fl6->saddr = p->laddr;
+	fl6->daddr = p->raddr;
+	fl6->flowi6_oif = p->link;
+	fl6->flowlabel = 0;
 
 	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
-		fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+		fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
 	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
-		fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+		fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
 
-	ip6_tnl_set_cap(t);
+	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+	p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
 
 	if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
 		dev->flags |= IFF_POINTOPOINT;
@@ -845,58 +1215,107 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
 	dev->iflink = p->link;
 
 	if (p->flags & IP6_TNL_F_CAP_XMIT) {
-		struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr,
-						 p->link, 0);
+		int strict = (ipv6_addr_type(&p->raddr) &
+			      (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
+
+		struct rt6_info *rt = rt6_lookup(t->net,
+						 &p->raddr, &p->laddr,
+						 p->link, strict);
 
 		if (rt == NULL)
 			return;
 
-		if (rt->rt6i_dev) {
-			dev->hard_header_len = rt->rt6i_dev->hard_header_len +
+		if (rt->dst.dev) {
+			dev->hard_header_len = rt->dst.dev->hard_header_len +
 				sizeof (struct ipv6hdr);
 
-			dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
+			dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr);
+			if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+				dev->mtu-=8;
 
 			if (dev->mtu < IPV6_MIN_MTU)
 				dev->mtu = IPV6_MIN_MTU;
 		}
-		dst_release(&rt->u.dst);
+		ip6_rt_put(rt);
 	}
 }
 
 /**
- * ip6ip6_tnl_change - update the tunnel parameters
+ * ip6_tnl_change - update the tunnel parameters
  *   @t: tunnel to be changed
  *   @p: tunnel configuration parameters
- *   @active: != 0 if tunnel is ready for use
  *
  * Description:
- *   ip6ip6_tnl_change() updates the tunnel parameters
+ *   ip6_tnl_change() updates the tunnel parameters
  **/
 
 static int
-ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
 {
-	ipv6_addr_copy(&t->parms.laddr, &p->laddr);
-	ipv6_addr_copy(&t->parms.raddr, &p->raddr);
+	t->parms.laddr = p->laddr;
+	t->parms.raddr = p->raddr;
 	t->parms.flags = p->flags;
 	t->parms.hop_limit = p->hop_limit;
 	t->parms.encap_limit = p->encap_limit;
 	t->parms.flowinfo = p->flowinfo;
 	t->parms.link = p->link;
-	ip6ip6_tnl_link_config(t);
+	t->parms.proto = p->proto;
+	ip6_tnl_dst_reset(t);
+	ip6_tnl_link_config(t);
 	return 0;
 }
 
+static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+{
+	struct net *net = t->net;
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+	int err;
+
+	ip6_tnl_unlink(ip6n, t);
+	synchronize_net();
+	err = ip6_tnl_change(t, p);
+	ip6_tnl_link(ip6n, t);
+	netdev_state_change(t->dev);
+	return err;
+}
+
+static void
+ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
+{
+	p->laddr = u->laddr;
+	p->raddr = u->raddr;
+	p->flags = u->flags;
+	p->hop_limit = u->hop_limit;
+	p->encap_limit = u->encap_limit;
+	p->flowinfo = u->flowinfo;
+	p->link = u->link;
+	p->proto = u->proto;
+	memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void
+ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
+{
+	u->laddr = p->laddr;
+	u->raddr = p->raddr;
+	u->flags = p->flags;
+	u->hop_limit = p->hop_limit;
+	u->encap_limit = p->encap_limit;
+	u->flowinfo = p->flowinfo;
+	u->link = p->link;
+	u->proto = p->proto;
+	memcpy(u->name, p->name, sizeof(u->name));
+}
+
 /**
- * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace 
+ * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
  *   @dev: virtual device associated with tunnel
  *   @ifr: parameters passed from userspace
  *   @cmd: command to be performed
  *
  * Description:
- *   ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels 
- *   from userspace. 
+ *   ip6_tnl_ioctl() is used for managing IPv6 tunnels
+ *   from userspace.
  *
  *   The possible commands are the following:
  *     %SIOCGETTUNNEL: get tunnel parameters for device
@@ -904,7 +1323,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
  *     %SIOCCHGTUNNEL: change tunnel parameters to those given
  *     %SIOCDELTUNNEL: delete tunnel
  *
- *   The fallback device "ip6tnl0", created during module 
+ *   The fallback device "ip6tnl0", created during module
  *   initialization, can be used for creating other tunnel devices.
  *
  * Return:
@@ -917,30 +1336,30 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
  **/
 
 static int
-ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	int err = 0;
-	int create;
 	struct ip6_tnl_parm p;
-	struct ip6_tnl *t = NULL;
+	struct __ip6_tnl_parm p1;
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = t->net;
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
-		if (dev == ip6ip6_fb_tnl_dev) {
-			if (copy_from_user(&p,
-					   ifr->ifr_ifru.ifru_data,
-					   sizeof (p))) {
+		if (dev == ip6n->fb_tnl_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
 				err = -EFAULT;
 				break;
 			}
-			if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV)
-				t = (struct ip6_tnl *) dev->priv;
-			else if (err)
-				break;
-		} else
-			t = (struct ip6_tnl *) dev->priv;
-
-		memcpy(&p, &t->parms, sizeof (p));
+			ip6_tnl_parm_from_user(&p1, &p);
+			t = ip6_tnl_locate(net, &p1, 0);
+			if (t == NULL)
+				t = netdev_priv(dev);
+		} else {
+			memset(&p, 0, sizeof(p));
+		}
+		ip6_tnl_parm_to_user(&p, &t->parms);
 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
 			err = -EFAULT;
 		}
@@ -948,58 +1367,58 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	case SIOCADDTUNNEL:
 	case SIOCCHGTUNNEL:
 		err = -EPERM;
-		create = (cmd == SIOCADDTUNNEL);
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			break;
-		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
-			err = -EFAULT;
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
 			break;
-		}
-		if (!create && dev != ip6ip6_fb_tnl_dev) {
-			t = (struct ip6_tnl *) dev->priv;
-		}
-		if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) {
+		err = -EINVAL;
+		if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
+		    p.proto != 0)
 			break;
+		ip6_tnl_parm_from_user(&p1, &p);
+		t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
+		if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else
+				t = netdev_priv(dev);
+
+			err = ip6_tnl_update(t, &p1);
 		}
-		if (cmd == SIOCCHGTUNNEL) {
-			if (t->dev != dev) {
-				err = -EEXIST;
-				break;
-			}
-			ip6ip6_tnl_unlink(t);
-			err = ip6ip6_tnl_change(t, &p);
-			ip6ip6_tnl_link(t);
-			netdev_state_change(dev);
-		}
-		if (copy_to_user(ifr->ifr_ifru.ifru_data,
-				 &t->parms, sizeof (p))) {
-			err = -EFAULT;
-		} else {
+		if (t) {
 			err = 0;
-		}
+			ip6_tnl_parm_to_user(&p, &t->parms);
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+				err = -EFAULT;
+
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 		break;
 	case SIOCDELTUNNEL:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			break;
 
-		if (dev == ip6ip6_fb_tnl_dev) {
-			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
-					   sizeof (p))) {
-				err = -EFAULT;
+		if (dev == ip6n->fb_tnl_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
 				break;
-			}
-			err = ip6ip6_tnl_locate(&p, &t, 0);
-			if (err)
+			err = -ENOENT;
+			ip6_tnl_parm_from_user(&p1, &p);
+			t = ip6_tnl_locate(net, &p1, 0);
+			if (t == NULL)
 				break;
-			if (t == ip6ip6_fb_tnl_dev->priv) {
-				err = -EPERM;
+			err = -EPERM;
+			if (t->dev == ip6n->fb_tnl_dev)
 				break;
-			}
-		} else {
-			t = (struct ip6_tnl *) dev->priv;
+			dev = t->dev;
 		}
-		err = unregister_netdevice(t->dev);
+		err = 0;
+		unregister_netdevice(dev);
 		break;
 	default:
 		err = -EINVAL;
@@ -1008,20 +1427,7 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 }
 
 /**
- * ip6ip6_tnl_get_stats - return the stats for tunnel device 
- *   @dev: virtual device associated with tunnel
- *
- * Return: stats for device
- **/
-
-static struct net_device_stats *
-ip6ip6_tnl_get_stats(struct net_device *dev)
-{
-	return &(((struct ip6_tnl *) dev->priv)->stat);
-}
-
-/**
- * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
+ * ip6_tnl_change_mtu - change mtu manually for tunnel device
  *   @dev: virtual device associated with tunnel
  *   @new_mtu: the new mtu
  *
@@ -1031,118 +1437,384 @@ ip6ip6_tnl_get_stats(struct net_device *dev)
  **/
 
 static int
-ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 {
-	if (new_mtu < IPV6_MIN_MTU) {
-		return -EINVAL;
+	struct ip6_tnl *tnl = netdev_priv(dev);
+
+	if (tnl->parms.proto == IPPROTO_IPIP) {
+		if (new_mtu < 68)
+			return -EINVAL;
+	} else {
+		if (new_mtu < IPV6_MIN_MTU)
+			return -EINVAL;
 	}
+	if (new_mtu > 0xFFF8 - dev->hard_header_len)
+		return -EINVAL;
 	dev->mtu = new_mtu;
 	return 0;
 }
 
+
+static const struct net_device_ops ip6_tnl_netdev_ops = {
+	.ndo_uninit	= ip6_tnl_dev_uninit,
+	.ndo_start_xmit = ip6_tnl_xmit,
+	.ndo_do_ioctl	= ip6_tnl_ioctl,
+	.ndo_change_mtu = ip6_tnl_change_mtu,
+	.ndo_get_stats	= ip6_get_stats,
+};
+
+
 /**
- * ip6ip6_tnl_dev_setup - setup virtual tunnel device
+ * ip6_tnl_dev_setup - setup virtual tunnel device
  *   @dev: virtual device associated with tunnel
  *
  * Description:
  *   Initialize function pointers and device parameters
  **/
 
-static void ip6ip6_tnl_dev_setup(struct net_device *dev)
+static void ip6_tnl_dev_setup(struct net_device *dev)
 {
-	SET_MODULE_OWNER(dev);
-	dev->uninit = ip6ip6_tnl_dev_uninit;
-	dev->destructor = free_netdev;
-	dev->hard_start_xmit = ip6ip6_tnl_xmit;
-	dev->get_stats = ip6ip6_tnl_get_stats;
-	dev->do_ioctl = ip6ip6_tnl_ioctl;
-	dev->change_mtu = ip6ip6_tnl_change_mtu;
+	struct ip6_tnl *t;
+
+	dev->netdev_ops = &ip6_tnl_netdev_ops;
+	dev->destructor = ip6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
 	dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+	t = netdev_priv(dev);
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		dev->mtu-=8;
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
+	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	/* This perm addr will be used as interface identifier by IPv6 */
+	dev->addr_assign_type = NET_ADDR_RANDOM;
+	eth_random_addr(dev->perm_addr);
 }
 
 
 /**
- * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
+ * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
  *   @dev: virtual device associated with tunnel
  **/
 
-static inline void
-ip6ip6_tnl_dev_init_gen(struct net_device *dev)
+static inline int
+ip6_tnl_dev_init_gen(struct net_device *dev)
 {
-	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
-	t->fl.proto = IPPROTO_IPV6;
+	struct ip6_tnl *t = netdev_priv(dev);
+
 	t->dev = dev;
-	strcpy(t->parms.name, dev->name);
+	t->net = dev_net(dev);
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+	return 0;
 }
 
 /**
- * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
+ * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
  *   @dev: virtual device associated with tunnel
  **/
 
-static int
-ip6ip6_tnl_dev_init(struct net_device *dev)
+static int ip6_tnl_dev_init(struct net_device *dev)
 {
-	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
-	ip6ip6_tnl_dev_init_gen(dev);
-	ip6ip6_tnl_link_config(t);
+	struct ip6_tnl *t = netdev_priv(dev);
+	int err = ip6_tnl_dev_init_gen(dev);
+
+	if (err)
+		return err;
+	ip6_tnl_link_config(t);
 	return 0;
 }
 
 /**
- * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
  *   @dev: fallback device
  *
  * Return: 0
  **/
 
-static int 
-ip6ip6_fb_tnl_dev_init(struct net_device *dev)
+static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
 {
-	struct ip6_tnl *t = dev->priv;
-	ip6ip6_tnl_dev_init_gen(dev);
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+	int err = ip6_tnl_dev_init_gen(dev);
+
+	if (err)
+		return err;
+
+	t->parms.proto = IPPROTO_IPV6;
 	dev_hold(dev);
-	tnls_wc[0] = t;
+
+	ip6_tnl_link_config(t);
+
+	rcu_assign_pointer(ip6n->tnls_wc[0], t);
 	return 0;
 }
 
-#ifdef CONFIG_INET6_TUNNEL
-static struct xfrm6_tunnel ip6ip6_handler = {
-	.handler	= ip6ip6_rcv,
-	.err_handler	= ip6ip6_err,
-};
+static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	u8 proto;
+
+	if (!data || !data[IFLA_IPTUN_PROTO])
+		return 0;
+
+	proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+	if (proto != IPPROTO_IPV6 &&
+	    proto != IPPROTO_IPIP &&
+	    proto != 0)
+		return -EINVAL;
 
-static inline int ip6ip6_register(void)
+	return 0;
+}
+
+static void ip6_tnl_netlink_parms(struct nlattr *data[],
+				  struct __ip6_tnl_parm *parms)
 {
-	return xfrm6_tunnel_register(&ip6ip6_handler);
+	memset(parms, 0, sizeof(*parms));
+
+	if (!data)
+		return;
+
+	if (data[IFLA_IPTUN_LINK])
+		parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+	if (data[IFLA_IPTUN_LOCAL])
+		nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL],
+			   sizeof(struct in6_addr));
+
+	if (data[IFLA_IPTUN_REMOTE])
+		nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE],
+			   sizeof(struct in6_addr));
+
+	if (data[IFLA_IPTUN_TTL])
+		parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
+
+	if (data[IFLA_IPTUN_ENCAP_LIMIT])
+		parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
+
+	if (data[IFLA_IPTUN_FLOWINFO])
+		parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
+
+	if (data[IFLA_IPTUN_FLAGS])
+		parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
+
+	if (data[IFLA_IPTUN_PROTO])
+		parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
 }
 
-static inline int ip6ip6_unregister(void)
+static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
+			   struct nlattr *tb[], struct nlattr *data[])
 {
-	return xfrm6_tunnel_deregister(&ip6ip6_handler);
+	struct net *net = dev_net(dev);
+	struct ip6_tnl *nt;
+
+	nt = netdev_priv(dev);
+	ip6_tnl_netlink_parms(data, &nt->parms);
+
+	if (ip6_tnl_locate(net, &nt->parms, 0))
+		return -EEXIST;
+
+	return ip6_tnl_create2(dev);
 }
-#else
-static struct inet6_protocol xfrm6_tunnel_protocol = {
+
+static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
+			      struct nlattr *data[])
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct __ip6_tnl_parm p;
+	struct net *net = t->net;
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+	if (dev == ip6n->fb_tnl_dev)
+		return -EINVAL;
+
+	ip6_tnl_netlink_parms(data, &p);
+
+	t = ip6_tnl_locate(net, &p, 0);
+
+	if (t) {
+		if (t->dev != dev)
+			return -EEXIST;
+	} else
+		t = netdev_priv(dev);
+
+	return ip6_tnl_update(t, &p);
+}
+
+static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct net *net = dev_net(dev);
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+	if (dev != ip6n->fb_tnl_dev)
+		unregister_netdevice_queue(dev, head);
+}
+
+static size_t ip6_tnl_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_IPTUN_LINK */
+		nla_total_size(4) +
+		/* IFLA_IPTUN_LOCAL */
+		nla_total_size(sizeof(struct in6_addr)) +
+		/* IFLA_IPTUN_REMOTE */
+		nla_total_size(sizeof(struct in6_addr)) +
+		/* IFLA_IPTUN_TTL */
+		nla_total_size(1) +
+		/* IFLA_IPTUN_ENCAP_LIMIT */
+		nla_total_size(1) +
+		/* IFLA_IPTUN_FLOWINFO */
+		nla_total_size(4) +
+		/* IFLA_IPTUN_FLAGS */
+		nla_total_size(4) +
+		/* IFLA_IPTUN_PROTO */
+		nla_total_size(1) +
+		0;
+}
+
+static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct ip6_tnl *tunnel = netdev_priv(dev);
+	struct __ip6_tnl_parm *parm = &tunnel->parms;
+
+	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
+	    nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
+		    &parm->laddr) ||
+	    nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
+		    &parm->raddr) ||
+	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
+	    nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
+	    nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
+	    nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
+	    nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
+	[IFLA_IPTUN_LINK]		= { .type = NLA_U32 },
+	[IFLA_IPTUN_LOCAL]		= { .len = sizeof(struct in6_addr) },
+	[IFLA_IPTUN_REMOTE]		= { .len = sizeof(struct in6_addr) },
+	[IFLA_IPTUN_TTL]		= { .type = NLA_U8 },
+	[IFLA_IPTUN_ENCAP_LIMIT]	= { .type = NLA_U8 },
+	[IFLA_IPTUN_FLOWINFO]		= { .type = NLA_U32 },
+	[IFLA_IPTUN_FLAGS]		= { .type = NLA_U32 },
+	[IFLA_IPTUN_PROTO]		= { .type = NLA_U8 },
+};
+
+static struct rtnl_link_ops ip6_link_ops __read_mostly = {
+	.kind		= "ip6tnl",
+	.maxtype	= IFLA_IPTUN_MAX,
+	.policy		= ip6_tnl_policy,
+	.priv_size	= sizeof(struct ip6_tnl),
+	.setup		= ip6_tnl_dev_setup,
+	.validate	= ip6_tnl_validate,
+	.newlink	= ip6_tnl_newlink,
+	.changelink	= ip6_tnl_changelink,
+	.dellink	= ip6_tnl_dellink,
+	.get_size	= ip6_tnl_get_size,
+	.fill_info	= ip6_tnl_fill_info,
+};
+
+static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
+	.handler	= ip4ip6_rcv,
+	.err_handler	= ip4ip6_err,
+	.priority	=	1,
+};
+
+static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
 	.handler	= ip6ip6_rcv,
 	.err_handler	= ip6ip6_err,
-	.flags		= INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+	.priority	=	1,
 };
 
-static inline int ip6ip6_register(void)
+static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
 {
-	return inet6_add_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6);
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+	struct net_device *dev, *aux;
+	int h;
+	struct ip6_tnl *t;
+	LIST_HEAD(list);
+
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == &ip6_link_ops)
+			unregister_netdevice_queue(dev, &list);
+
+	for (h = 0; h < HASH_SIZE; h++) {
+		t = rtnl_dereference(ip6n->tnls_r_l[h]);
+		while (t != NULL) {
+			/* If dev is in the same netns, it has already
+			 * been added to the list by the previous loop.
+			 */
+			if (!net_eq(dev_net(t->dev), net))
+				unregister_netdevice_queue(t->dev, &list);
+			t = rtnl_dereference(t->next);
+		}
+	}
+
+	unregister_netdevice_many(&list);
 }
 
-static inline int ip6ip6_unregister(void)
+static int __net_init ip6_tnl_init_net(struct net *net)
 {
-	return inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6);
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+	struct ip6_tnl *t = NULL;
+	int err;
+
+	ip6n->tnls[0] = ip6n->tnls_wc;
+	ip6n->tnls[1] = ip6n->tnls_r_l;
+
+	err = -ENOMEM;
+	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
+				      ip6_tnl_dev_setup);
+
+	if (!ip6n->fb_tnl_dev)
+		goto err_alloc_dev;
+	dev_net_set(ip6n->fb_tnl_dev, net);
+	ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
+	/* FB netdevice is special: we have one, and only one per netns.
+	 * Allowing to move it to another netns is clearly unsafe.
+	 */
+	ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
+
+	err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
+	if (err < 0)
+		goto err_register;
+
+	err = register_netdev(ip6n->fb_tnl_dev);
+	if (err < 0)
+		goto err_register;
+
+	t = netdev_priv(ip6n->fb_tnl_dev);
+
+	strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
+	return 0;
+
+err_register:
+	ip6_dev_free(ip6n->fb_tnl_dev);
+err_alloc_dev:
+	return err;
+}
+
+static void __net_exit ip6_tnl_exit_net(struct net *net)
+{
+	rtnl_lock();
+	ip6_tnl_destroy_tunnels(net);
+	rtnl_unlock();
 }
-#endif
+
+static struct pernet_operations ip6_tnl_net_ops = {
+	.init = ip6_tnl_init_net,
+	.exit = ip6_tnl_exit_net,
+	.id   = &ip6_tnl_net_id,
+	.size = sizeof(struct ip6_tnl_net),
+};
 
 /**
  * ip6_tunnel_init - register protocol and reserve needed resources
@@ -1154,26 +1826,34 @@ static int __init ip6_tunnel_init(void)
 {
 	int  err;
 
-	if (ip6ip6_register() < 0) {
-		printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
-		return -EAGAIN;
-	}
-	ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
-					 ip6ip6_tnl_dev_setup);
+	err = register_pernet_device(&ip6_tnl_net_ops);
+	if (err < 0)
+		goto out_pernet;
 
-	if (!ip6ip6_fb_tnl_dev) {
-		err = -ENOMEM;
-		goto fail;
+	err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
+	if (err < 0) {
+		pr_err("%s: can't register ip4ip6\n", __func__);
+		goto out_ip4ip6;
 	}
-	ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;
 
-	if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
-		free_netdev(ip6ip6_fb_tnl_dev);
-		goto fail;
+	err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
+	if (err < 0) {
+		pr_err("%s: can't register ip6ip6\n", __func__);
+		goto out_ip6ip6;
 	}
+	err = rtnl_link_register(&ip6_link_ops);
+	if (err < 0)
+		goto rtnl_link_failed;
+
 	return 0;
-fail:
-	ip6ip6_unregister();
+
+rtnl_link_failed:
+	xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
+out_ip6ip6:
+	xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
+out_ip4ip6:
+	unregister_pernet_device(&ip6_tnl_net_ops);
+out_pernet:
 	return err;
 }
 
@@ -1183,10 +1863,14 @@ fail:
 
 static void __exit ip6_tunnel_cleanup(void)
 {
-	if (ip6ip6_unregister() < 0)
-		printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
+	rtnl_link_unregister(&ip6_link_ops);
+	if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
+		pr_info("%s: can't deregister ip4ip6\n", __func__);
+
+	if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
+		pr_info("%s: can't deregister ip6ip6\n", __func__);
 
-	unregister_netdev(ip6ip6_fb_tnl_dev);
+	unregister_pernet_device(&ip6_tnl_net_ops);
 }
 
 module_init(ip6_tunnel_init);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
new file mode 100644
index 00000000000..9aaa6bb229e
--- /dev/null
+++ b/net/ipv6/ip6_vti.c
@@ -0,0 +1,1160 @@
+/*
+ *	IPv6 virtual tunneling interface
+ *
+ *	Copyright (C) 2013 secunet Security Networks AG
+ *
+ *	Author:
+ *	Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ *	Based on:
+ *	net/ipv6/ip6_tunnel.c
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#define HASH_SIZE_SHIFT  5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+{
+	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
+
+	return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+static int vti6_dev_init(struct net_device *dev);
+static void vti6_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops vti6_link_ops __read_mostly;
+
+static int vti6_net_id __read_mostly;
+struct vti6_net {
+	/* the vti6 tunnel fallback device */
+	struct net_device *fb_tnl_dev;
+	/* lists for storing tunnels in use */
+	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+	struct ip6_tnl __rcu *tnls_wc[1];
+	struct ip6_tnl __rcu **tnls[2];
+};
+
+#define for_each_vti6_tunnel_rcu(start) \
+	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/**
+ * vti6_tnl_lookup - fetch tunnel matching the end-point addresses
+ *   @net: network namespace
+ *   @remote: the address of the tunnel exit-point
+ *   @local: the address of the tunnel entry-point
+ *
+ * Return:
+ *   tunnel matching given end-points if found,
+ *   else fallback tunnel if its device is up,
+ *   else %NULL
+ **/
+static struct ip6_tnl *
+vti6_tnl_lookup(struct net *net, const struct in6_addr *remote,
+		const struct in6_addr *local)
+{
+	unsigned int hash = HASH(remote, local);
+	struct ip6_tnl *t;
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+	for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
+		if (ipv6_addr_equal(local, &t->parms.laddr) &&
+		    ipv6_addr_equal(remote, &t->parms.raddr) &&
+		    (t->dev->flags & IFF_UP))
+			return t;
+	}
+	t = rcu_dereference(ip6n->tnls_wc[0]);
+	if (t && (t->dev->flags & IFF_UP))
+		return t;
+
+	return NULL;
+}
+
+/**
+ * vti6_tnl_bucket - get head of list matching given tunnel parameters
+ *   @p: parameters containing tunnel end-points
+ *
+ * Description:
+ *   vti6_tnl_bucket() returns the head of the list matching the
+ *   &struct in6_addr entries laddr and raddr in @p.
+ *
+ * Return: head of IPv6 tunnel list
+ **/
+static struct ip6_tnl __rcu **
+vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p)
+{
+	const struct in6_addr *remote = &p->raddr;
+	const struct in6_addr *local = &p->laddr;
+	unsigned int h = 0;
+	int prio = 0;
+
+	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
+		prio = 1;
+		h = HASH(remote, local);
+	}
+	return &ip6n->tnls[prio][h];
+}
+
+static void
+vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t)
+{
+	struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms);
+
+	rcu_assign_pointer(t->next , rtnl_dereference(*tp));
+	rcu_assign_pointer(*tp, t);
+}
+
+static void
+vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
+{
+	struct ip6_tnl __rcu **tp;
+	struct ip6_tnl *iter;
+
+	for (tp = vti6_tnl_bucket(ip6n, &t->parms);
+	     (iter = rtnl_dereference(*tp)) != NULL;
+	     tp = &iter->next) {
+		if (t == iter) {
+			rcu_assign_pointer(*tp, t->next);
+			break;
+		}
+	}
+}
+
+static void vti6_dev_free(struct net_device *dev)
+{
+	free_percpu(dev->tstats);
+	free_netdev(dev);
+}
+
+static int vti6_tnl_create2(struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+	int err;
+
+	err = vti6_dev_init(dev);
+	if (err < 0)
+		goto out;
+
+	err = register_netdevice(dev);
+	if (err < 0)
+		goto out;
+
+	strcpy(t->parms.name, dev->name);
+	dev->rtnl_link_ops = &vti6_link_ops;
+
+	dev_hold(dev);
+	vti6_tnl_link(ip6n, t);
+
+	return 0;
+
+out:
+	return err;
+}
+
+static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
+{
+	struct net_device *dev;
+	struct ip6_tnl *t;
+	char name[IFNAMSIZ];
+	int err;
+
+	if (p->name[0])
+		strlcpy(name, p->name, IFNAMSIZ);
+	else
+		sprintf(name, "ip6_vti%%d");
+
+	dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup);
+	if (dev == NULL)
+		goto failed;
+
+	dev_net_set(dev, net);
+
+	t = netdev_priv(dev);
+	t->parms = *p;
+	t->net = dev_net(dev);
+
+	err = vti6_tnl_create2(dev);
+	if (err < 0)
+		goto failed_free;
+
+	return t;
+
+failed_free:
+	vti6_dev_free(dev);
+failed:
+	return NULL;
+}
+
+/**
+ * vti6_locate - find or create tunnel matching given parameters
+ *   @net: network namespace
+ *   @p: tunnel parameters
+ *   @create: != 0 if allowed to create new tunnel if no match found
+ *
+ * Description:
+ *   vti6_locate() first tries to locate an existing tunnel
+ *   based on @parms. If this is unsuccessful, but @create is set a new
+ *   tunnel device is created and registered for use.
+ *
+ * Return:
+ *   matching tunnel or NULL
+ **/
+static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
+				   int create)
+{
+	const struct in6_addr *remote = &p->raddr;
+	const struct in6_addr *local = &p->laddr;
+	struct ip6_tnl __rcu **tp;
+	struct ip6_tnl *t;
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+	for (tp = vti6_tnl_bucket(ip6n, p);
+	     (t = rtnl_dereference(*tp)) != NULL;
+	     tp = &t->next) {
+		if (ipv6_addr_equal(local, &t->parms.laddr) &&
+		    ipv6_addr_equal(remote, &t->parms.raddr))
+			return t;
+	}
+	if (!create)
+		return NULL;
+	return vti6_tnl_create(net, p);
+}
+
+/**
+ * vti6_dev_uninit - tunnel device uninitializer
+ *   @dev: the device to be destroyed
+ *
+ * Description:
+ *   vti6_dev_uninit() removes tunnel from its list
+ **/
+static void vti6_dev_uninit(struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+	if (dev == ip6n->fb_tnl_dev)
+		RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
+	else
+		vti6_tnl_unlink(ip6n, t);
+	dev_put(dev);
+}
+
+static int vti6_rcv(struct sk_buff *skb)
+{
+	struct ip6_tnl *t;
+	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+	rcu_read_lock();
+	if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
+				 &ipv6h->daddr)) != NULL) {
+		if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
+			rcu_read_unlock();
+			goto discard;
+		}
+
+		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+			rcu_read_unlock();
+			return 0;
+		}
+
+		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
+			t->dev->stats.rx_dropped++;
+			rcu_read_unlock();
+			goto discard;
+		}
+
+		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+		skb->mark = be32_to_cpu(t->parms.i_key);
+
+		rcu_read_unlock();
+
+		return xfrm6_rcv(skb);
+	}
+	rcu_read_unlock();
+	return -EINVAL;
+discard:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int vti6_rcv_cb(struct sk_buff *skb, int err)
+{
+	unsigned short family;
+	struct net_device *dev;
+	struct pcpu_sw_netstats *tstats;
+	struct xfrm_state *x;
+	struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+
+	if (!t)
+		return 1;
+
+	dev = t->dev;
+
+	if (err) {
+		dev->stats.rx_errors++;
+		dev->stats.rx_dropped++;
+
+		return 0;
+	}
+
+	x = xfrm_input_state(skb);
+	family = x->inner_mode->afinfo->family;
+
+	if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+		return -EPERM;
+
+	skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
+	skb->dev = dev;
+
+	tstats = this_cpu_ptr(dev->tstats);
+	u64_stats_update_begin(&tstats->syncp);
+	tstats->rx_packets++;
+	tstats->rx_bytes += skb->len;
+	u64_stats_update_end(&tstats->syncp);
+
+	return 0;
+}
+
+/**
+ * vti6_addr_conflict - compare packet addresses to tunnel's own
+ *   @t: the outgoing tunnel device
+ *   @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ *   Avoid trivial tunneling loop by checking that tunnel exit-point
+ *   doesn't match source of incoming packet.
+ *
+ * Return:
+ *   1 if conflict,
+ *   0 else
+ **/
+static inline bool
+vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
+{
+	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+static bool vti6_state_check(const struct xfrm_state *x,
+			     const struct in6_addr *dst,
+			     const struct in6_addr *src)
+{
+	xfrm_address_t *daddr = (xfrm_address_t *)dst;
+	xfrm_address_t *saddr = (xfrm_address_t *)src;
+
+	/* if there is no transform then this tunnel is not functional.
+	 * Or if the xfrm is not mode tunnel.
+	 */
+	if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+	    x->props.family != AF_INET6)
+		return false;
+
+	if (ipv6_addr_any(dst))
+		return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6);
+
+	if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6))
+		return false;
+
+	return true;
+}
+
+/**
+ * vti6_xmit - send a packet
+ *   @skb: the outgoing socket buffer
+ *   @dev: the outgoing tunnel device
+ *   @fl: the flow informations for the xfrm_lookup
+ **/
+static int
+vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net_device_stats *stats = &t->dev->stats;
+	struct dst_entry *dst = skb_dst(skb);
+	struct net_device *tdev;
+	int err = -1;
+
+	if (!dst)
+		goto tx_err_link_failure;
+
+	dst_hold(dst);
+	dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		dst = NULL;
+		goto tx_err_link_failure;
+	}
+
+	if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr))
+		goto tx_err_link_failure;
+
+	tdev = dst->dev;
+
+	if (tdev == dev) {
+		stats->collisions++;
+		net_warn_ratelimited("%s: Local routing loop detected!\n",
+				     t->parms.name);
+		goto tx_err_dst_release;
+	}
+
+	skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+	skb_dst_set(skb, dst);
+	skb->dev = skb_dst(skb)->dev;
+
+	err = dst_output(skb);
+	if (net_xmit_eval(err) == 0) {
+		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->tx_bytes += skb->len;
+		tstats->tx_packets++;
+		u64_stats_update_end(&tstats->syncp);
+	} else {
+		stats->tx_errors++;
+		stats->tx_aborted_errors++;
+	}
+
+	return 0;
+tx_err_link_failure:
+	stats->tx_carrier_errors++;
+	dst_link_failure(skb);
+tx_err_dst_release:
+	dst_release(dst);
+	return err;
+}
+
+static netdev_tx_t
+vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net_device_stats *stats = &t->dev->stats;
+	struct ipv6hdr *ipv6h;
+	struct flowi fl;
+	int ret;
+
+	memset(&fl, 0, sizeof(fl));
+	skb->mark = be32_to_cpu(t->parms.o_key);
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IPV6):
+		ipv6h = ipv6_hdr(skb);
+
+		if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+		    !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+			goto tx_err;
+
+		xfrm_decode_session(skb, &fl, AF_INET6);
+		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+		break;
+	case htons(ETH_P_IP):
+		xfrm_decode_session(skb, &fl, AF_INET);
+		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+		break;
+	default:
+		goto tx_err;
+	}
+
+	ret = vti6_xmit(skb, dev, &fl);
+	if (ret < 0)
+		goto tx_err;
+
+	return NETDEV_TX_OK;
+
+tx_err:
+	stats->tx_errors++;
+	stats->tx_dropped++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		    u8 type, u8 code, int offset, __be32 info)
+{
+	__be32 spi;
+	__u32 mark;
+	struct xfrm_state *x;
+	struct ip6_tnl *t;
+	struct ip_esp_hdr *esph;
+	struct ip_auth_hdr *ah;
+	struct ip_comp_hdr *ipch;
+	struct net *net = dev_net(skb->dev);
+	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+	int protocol = iph->nexthdr;
+
+	t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr);
+	if (!t)
+		return -1;
+
+	mark = be32_to_cpu(t->parms.o_key);
+
+	switch (protocol) {
+	case IPPROTO_ESP:
+		esph = (struct ip_esp_hdr *)(skb->data + offset);
+		spi = esph->spi;
+		break;
+	case IPPROTO_AH:
+		ah = (struct ip_auth_hdr *)(skb->data + offset);
+		spi = ah->spi;
+		break;
+	case IPPROTO_COMP:
+		ipch = (struct ip_comp_hdr *)(skb->data + offset);
+		spi = htonl(ntohs(ipch->cpi));
+		break;
+	default:
+		return 0;
+	}
+
+	if (type != ICMPV6_PKT_TOOBIG &&
+	    type != NDISC_REDIRECT)
+		return 0;
+
+	x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
+			      spi, protocol, AF_INET6);
+	if (!x)
+		return 0;
+
+	if (type == NDISC_REDIRECT)
+		ip6_redirect(skb, net, skb->dev->ifindex, 0);
+	else
+		ip6_update_pmtu(skb, net, info, 0, 0);
+	xfrm_state_put(x);
+
+	return 0;
+}
+
+static void vti6_link_config(struct ip6_tnl *t)
+{
+	struct net_device *dev = t->dev;
+	struct __ip6_tnl_parm *p = &t->parms;
+
+	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+
+	p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
+		      IP6_TNL_F_CAP_PER_PACKET);
+	p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
+
+	if (p->flags & IP6_TNL_F_CAP_XMIT && p->flags & IP6_TNL_F_CAP_RCV)
+		dev->flags |= IFF_POINTOPOINT;
+	else
+		dev->flags &= ~IFF_POINTOPOINT;
+
+	dev->iflink = p->link;
+}
+
+/**
+ * vti6_tnl_change - update the tunnel parameters
+ *   @t: tunnel to be changed
+ *   @p: tunnel configuration parameters
+ *
+ * Description:
+ *   vti6_tnl_change() updates the tunnel parameters
+ **/
+static int
+vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
+{
+	t->parms.laddr = p->laddr;
+	t->parms.raddr = p->raddr;
+	t->parms.link = p->link;
+	t->parms.i_key = p->i_key;
+	t->parms.o_key = p->o_key;
+	t->parms.proto = p->proto;
+	ip6_tnl_dst_reset(t);
+	vti6_link_config(t);
+	return 0;
+}
+
+static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+{
+	struct net *net = dev_net(t->dev);
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+	int err;
+
+	vti6_tnl_unlink(ip6n, t);
+	synchronize_net();
+	err = vti6_tnl_change(t, p);
+	vti6_tnl_link(ip6n, t);
+	netdev_state_change(t->dev);
+	return err;
+}
+
+static void
+vti6_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u)
+{
+	p->laddr = u->laddr;
+	p->raddr = u->raddr;
+	p->link = u->link;
+	p->i_key = u->i_key;
+	p->o_key = u->o_key;
+	p->proto = u->proto;
+
+	memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void
+vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
+{
+	u->laddr = p->laddr;
+	u->raddr = p->raddr;
+	u->link = p->link;
+	u->i_key = p->i_key;
+	u->o_key = p->o_key;
+	u->proto = p->proto;
+
+	memcpy(u->name, p->name, sizeof(u->name));
+}
+
+/**
+ * vti6_tnl_ioctl - configure vti6 tunnels from userspace
+ *   @dev: virtual device associated with tunnel
+ *   @ifr: parameters passed from userspace
+ *   @cmd: command to be performed
+ *
+ * Description:
+ *   vti6_ioctl() is used for managing vti6 tunnels
+ *   from userspace.
+ *
+ *   The possible commands are the following:
+ *     %SIOCGETTUNNEL: get tunnel parameters for device
+ *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
+ *     %SIOCCHGTUNNEL: change tunnel parameters to those given
+ *     %SIOCDELTUNNEL: delete tunnel
+ *
+ *   The fallback device "ip6_vti0", created during module
+ *   initialization, can be used for creating other tunnel devices.
+ *
+ * Return:
+ *   0 on success,
+ *   %-EFAULT if unable to copy data to or from userspace,
+ *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
+ *   %-EINVAL if passed tunnel parameters are invalid,
+ *   %-EEXIST if changing a tunnel's parameters would cause a conflict
+ *   %-ENODEV if attempting to change or delete a nonexisting device
+ **/
+static int
+vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	struct ip6_tnl_parm2 p;
+	struct __ip6_tnl_parm p1;
+	struct ip6_tnl *t = NULL;
+	struct net *net = dev_net(dev);
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		if (dev == ip6n->fb_tnl_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+				err = -EFAULT;
+				break;
+			}
+			vti6_parm_from_user(&p1, &p);
+			t = vti6_locate(net, &p1, 0);
+		} else {
+			memset(&p, 0, sizeof(p));
+		}
+		if (t == NULL)
+			t = netdev_priv(dev);
+		vti6_parm_to_user(&p, &t->parms);
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+			err = -EFAULT;
+		break;
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			break;
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			break;
+		err = -EINVAL;
+		if (p.proto != IPPROTO_IPV6  && p.proto != 0)
+			break;
+		vti6_parm_from_user(&p1, &p);
+		t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL);
+		if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else
+				t = netdev_priv(dev);
+
+			err = vti6_update(t, &p1);
+		}
+		if (t) {
+			err = 0;
+			vti6_parm_to_user(&p, &t->parms);
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+				err = -EFAULT;
+
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			break;
+
+		if (dev == ip6n->fb_tnl_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+				break;
+			err = -ENOENT;
+			vti6_parm_from_user(&p1, &p);
+			t = vti6_locate(net, &p1, 0);
+			if (t == NULL)
+				break;
+			err = -EPERM;
+			if (t->dev == ip6n->fb_tnl_dev)
+				break;
+			dev = t->dev;
+		}
+		err = 0;
+		unregister_netdevice(dev);
+		break;
+	default:
+		err = -EINVAL;
+	}
+	return err;
+}
+
+/**
+ * vti6_tnl_change_mtu - change mtu manually for tunnel device
+ *   @dev: virtual device associated with tunnel
+ *   @new_mtu: the new mtu
+ *
+ * Return:
+ *   0 on success,
+ *   %-EINVAL if mtu too small
+ **/
+static int vti6_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu < IPV6_MIN_MTU)
+		return -EINVAL;
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static const struct net_device_ops vti6_netdev_ops = {
+	.ndo_uninit	= vti6_dev_uninit,
+	.ndo_start_xmit = vti6_tnl_xmit,
+	.ndo_do_ioctl	= vti6_ioctl,
+	.ndo_change_mtu = vti6_change_mtu,
+	.ndo_get_stats64 = ip_tunnel_get_stats64,
+};
+
+/**
+ * vti6_dev_setup - setup virtual tunnel device
+ *   @dev: virtual device associated with tunnel
+ *
+ * Description:
+ *   Initialize function pointers and device parameters
+ **/
+static void vti6_dev_setup(struct net_device *dev)
+{
+	dev->netdev_ops = &vti6_netdev_ops;
+	dev->destructor = vti6_dev_free;
+
+	dev->type = ARPHRD_TUNNEL6;
+	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
+	dev->mtu = ETH_DATA_LEN;
+	dev->flags |= IFF_NOARP;
+	dev->addr_len = sizeof(struct in6_addr);
+	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+/**
+ * vti6_dev_init_gen - general initializer for all tunnel devices
+ *   @dev: virtual device associated with tunnel
+ **/
+static inline int vti6_dev_init_gen(struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+
+	t->dev = dev;
+	t->net = dev_net(dev);
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+	return 0;
+}
+
+/**
+ * vti6_dev_init - initializer for all non fallback tunnel devices
+ *   @dev: virtual device associated with tunnel
+ **/
+static int vti6_dev_init(struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	int err = vti6_dev_init_gen(dev);
+
+	if (err)
+		return err;
+	vti6_link_config(t);
+	return 0;
+}
+
+/**
+ * vti6_fb_tnl_dev_init - initializer for fallback tunnel device
+ *   @dev: fallback device
+ *
+ * Return: 0
+ **/
+static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+	int err = vti6_dev_init_gen(dev);
+
+	if (err)
+		return err;
+
+	t->parms.proto = IPPROTO_IPV6;
+	dev_hold(dev);
+
+	vti6_link_config(t);
+
+	rcu_assign_pointer(ip6n->tnls_wc[0], t);
+	return 0;
+}
+
+static int vti6_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	return 0;
+}
+
+static void vti6_netlink_parms(struct nlattr *data[],
+			       struct __ip6_tnl_parm *parms)
+{
+	memset(parms, 0, sizeof(*parms));
+
+	if (!data)
+		return;
+
+	if (data[IFLA_VTI_LINK])
+		parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
+
+	if (data[IFLA_VTI_LOCAL])
+		nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL],
+			   sizeof(struct in6_addr));
+
+	if (data[IFLA_VTI_REMOTE])
+		nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE],
+			   sizeof(struct in6_addr));
+
+	if (data[IFLA_VTI_IKEY])
+		parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
+
+	if (data[IFLA_VTI_OKEY])
+		parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
+}
+
+static int vti6_newlink(struct net *src_net, struct net_device *dev,
+			struct nlattr *tb[], struct nlattr *data[])
+{
+	struct net *net = dev_net(dev);
+	struct ip6_tnl *nt;
+
+	nt = netdev_priv(dev);
+	vti6_netlink_parms(data, &nt->parms);
+
+	nt->parms.proto = IPPROTO_IPV6;
+
+	if (vti6_locate(net, &nt->parms, 0))
+		return -EEXIST;
+
+	return vti6_tnl_create2(dev);
+}
+
+static int vti6_changelink(struct net_device *dev, struct nlattr *tb[],
+			   struct nlattr *data[])
+{
+	struct ip6_tnl *t;
+	struct __ip6_tnl_parm p;
+	struct net *net = dev_net(dev);
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+	if (dev == ip6n->fb_tnl_dev)
+		return -EINVAL;
+
+	vti6_netlink_parms(data, &p);
+
+	t = vti6_locate(net, &p, 0);
+
+	if (t) {
+		if (t->dev != dev)
+			return -EEXIST;
+	} else
+		t = netdev_priv(dev);
+
+	return vti6_update(t, &p);
+}
+
+static size_t vti6_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_VTI_LINK */
+		nla_total_size(4) +
+		/* IFLA_VTI_LOCAL */
+		nla_total_size(sizeof(struct in6_addr)) +
+		/* IFLA_VTI_REMOTE */
+		nla_total_size(sizeof(struct in6_addr)) +
+		/* IFLA_VTI_IKEY */
+		nla_total_size(4) +
+		/* IFLA_VTI_OKEY */
+		nla_total_size(4) +
+		0;
+}
+
+static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct ip6_tnl *tunnel = netdev_priv(dev);
+	struct __ip6_tnl_parm *parm = &tunnel->parms;
+
+	if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) ||
+	    nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr),
+		    &parm->laddr) ||
+	    nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr),
+		    &parm->raddr) ||
+	    nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) ||
+	    nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = {
+	[IFLA_VTI_LINK]		= { .type = NLA_U32 },
+	[IFLA_VTI_LOCAL]	= { .len = sizeof(struct in6_addr) },
+	[IFLA_VTI_REMOTE]	= { .len = sizeof(struct in6_addr) },
+	[IFLA_VTI_IKEY]		= { .type = NLA_U32 },
+	[IFLA_VTI_OKEY]		= { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops vti6_link_ops __read_mostly = {
+	.kind		= "vti6",
+	.maxtype	= IFLA_VTI_MAX,
+	.policy		= vti6_policy,
+	.priv_size	= sizeof(struct ip6_tnl),
+	.setup		= vti6_dev_setup,
+	.validate	= vti6_validate,
+	.newlink	= vti6_newlink,
+	.changelink	= vti6_changelink,
+	.get_size	= vti6_get_size,
+	.fill_info	= vti6_fill_info,
+};
+
+static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
+{
+	int h;
+	struct ip6_tnl *t;
+	LIST_HEAD(list);
+
+	for (h = 0; h < HASH_SIZE; h++) {
+		t = rtnl_dereference(ip6n->tnls_r_l[h]);
+		while (t != NULL) {
+			unregister_netdevice_queue(t->dev, &list);
+			t = rtnl_dereference(t->next);
+		}
+	}
+
+	t = rtnl_dereference(ip6n->tnls_wc[0]);
+	unregister_netdevice_queue(t->dev, &list);
+	unregister_netdevice_many(&list);
+}
+
+static int __net_init vti6_init_net(struct net *net)
+{
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+	struct ip6_tnl *t = NULL;
+	int err;
+
+	ip6n->tnls[0] = ip6n->tnls_wc;
+	ip6n->tnls[1] = ip6n->tnls_r_l;
+
+	err = -ENOMEM;
+	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
+					vti6_dev_setup);
+
+	if (!ip6n->fb_tnl_dev)
+		goto err_alloc_dev;
+	dev_net_set(ip6n->fb_tnl_dev, net);
+
+	err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
+	if (err < 0)
+		goto err_register;
+
+	err = register_netdev(ip6n->fb_tnl_dev);
+	if (err < 0)
+		goto err_register;
+
+	t = netdev_priv(ip6n->fb_tnl_dev);
+
+	strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
+	return 0;
+
+err_register:
+	vti6_dev_free(ip6n->fb_tnl_dev);
+err_alloc_dev:
+	return err;
+}
+
+static void __net_exit vti6_exit_net(struct net *net)
+{
+	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+	rtnl_lock();
+	vti6_destroy_tunnels(ip6n);
+	rtnl_unlock();
+}
+
+static struct pernet_operations vti6_net_ops = {
+	.init = vti6_init_net,
+	.exit = vti6_exit_net,
+	.id   = &vti6_net_id,
+	.size = sizeof(struct vti6_net),
+};
+
+static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
+	.handler	=	vti6_rcv,
+	.cb_handler	=	vti6_rcv_cb,
+	.err_handler	=	vti6_err,
+	.priority	=	100,
+};
+
+static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
+	.handler	=	vti6_rcv,
+	.cb_handler	=	vti6_rcv_cb,
+	.err_handler	=	vti6_err,
+	.priority	=	100,
+};
+
+static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
+	.handler	=	vti6_rcv,
+	.cb_handler	=	vti6_rcv_cb,
+	.err_handler	=	vti6_err,
+	.priority	=	100,
+};
+
+/**
+ * vti6_tunnel_init - register protocol and reserve needed resources
+ *
+ * Return: 0 on success
+ **/
+static int __init vti6_tunnel_init(void)
+{
+	int  err;
+
+	err = register_pernet_device(&vti6_net_ops);
+	if (err < 0)
+		goto out_pernet;
+
+	err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
+	if (err < 0) {
+		pr_err("%s: can't register vti6 protocol\n", __func__);
+
+		goto out;
+	}
+
+	err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
+	if (err < 0) {
+		xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+		pr_err("%s: can't register vti6 protocol\n", __func__);
+
+		goto out;
+	}
+
+	err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
+	if (err < 0) {
+		xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+		xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+		pr_err("%s: can't register vti6 protocol\n", __func__);
+
+		goto out;
+	}
+
+	err = rtnl_link_register(&vti6_link_ops);
+	if (err < 0)
+		goto rtnl_link_failed;
+
+	return 0;
+
+rtnl_link_failed:
+	xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+	xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+	xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+out:
+	unregister_pernet_device(&vti6_net_ops);
+out_pernet:
+	return err;
+}
+
+/**
+ * vti6_tunnel_cleanup - free resources and unregister protocol
+ **/
+static void __exit vti6_tunnel_cleanup(void)
+{
+	rtnl_link_unregister(&vti6_link_ops);
+	if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP))
+		pr_info("%s: can't deregister protocol\n", __func__);
+	if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH))
+		pr_info("%s: can't deregister protocol\n", __func__);
+	if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP))
+		pr_info("%s: can't deregister protocol\n", __func__);
+
+	unregister_pernet_device(&vti6_net_ops);
+}
+
+module_init(vti6_tunnel_init);
+module_exit(vti6_tunnel_cleanup);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("vti6");
+MODULE_ALIAS_NETDEV("ip6_vti0");
+MODULE_AUTHOR("Steffen Klassert");
+MODULE_DESCRIPTION("IPv6 virtual tunnel interface");
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
new file mode 100644
index 00000000000..8250474ab7d
--- /dev/null
+++ b/net/ipv6/ip6mr.c
@@ -0,0 +1,2503 @@
+/*
+ *	Linux IPv6 multicast routing support for BSD pim6sd
+ *	Based on net/ipv4/ipmr.c.
+ *
+ *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
+ *		LSIIT Laboratory, Strasbourg, France
+ *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
+ *		6WIND, Paris, France
+ *	Copyright (C)2007,2008 USAGI/WIDE Project
+ *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/raw.h>
+#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <net/checksum.h>
+#include <net/netlink.h>
+#include <net/fib_rules.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/mroute6.h>
+#include <linux/pim.h>
+#include <net/addrconf.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/export.h>
+#include <net/ip6_checksum.h>
+#include <linux/netconf.h>
+
+struct mr6_table {
+	struct list_head	list;
+#ifdef CONFIG_NET_NS
+	struct net		*net;
+#endif
+	u32			id;
+	struct sock		*mroute6_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct list_head	mfc6_unres_queue;
+	struct list_head	mfc6_cache_array[MFC6_LINES];
+	struct mif_device	vif6_table[MAXMIFS];
+	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
+	bool			mroute_do_assert;
+	bool			mroute_do_pim;
+#ifdef CONFIG_IPV6_PIMSM_V2
+	int			mroute_reg_vif_num;
+#endif
+};
+
+struct ip6mr_rule {
+	struct fib_rule		common;
+};
+
+struct ip6mr_result {
+	struct mr6_table	*mrt;
+};
+
+/* Big lock, protecting vif table, mrt cache and mroute socket state.
+   Note that the changes are semaphored via rtnl_lock.
+ */
+
+static DEFINE_RWLOCK(mrt_lock);
+
+/*
+ *	Multicast router control variables
+ */
+
+#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+
+/* Special spinlock for queue of unresolved entries */
+static DEFINE_SPINLOCK(mfc_unres_lock);
+
+/* We return to original Alan's scheme. Hash table of resolved
+   entries is changed only in process context and protected
+   with weak lock mrt_lock. Queue of unresolved entries is protected
+   with strong spinlock mfc_unres_lock.
+
+   In this case data path is free of exclusive locks at all.
+ */
+
+static struct kmem_cache *mrt_cachep __read_mostly;
+
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr6_table *mrt);
+
+static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+			   struct sk_buff *skb, struct mfc6_cache *cache);
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+			      mifi_t mifi, int assert);
+static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+			       struct mfc6_cache *c, struct rtmsg *rtm);
+static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+			      int cmd);
+static int ip6mr_rtm_dumproute(struct sk_buff *skb,
+			       struct netlink_callback *cb);
+static void mroute_clean_tables(struct mr6_table *mrt);
+static void ipmr_expire_process(unsigned long arg);
+
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+#define ip6mr_for_each_table(mrt, net) \
+	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	struct mr6_table *mrt;
+
+	ip6mr_for_each_table(mrt, net) {
+		if (mrt->id == id)
+			return mrt;
+	}
+	return NULL;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
+			    struct mr6_table **mrt)
+{
+	int err;
+	struct ip6mr_result res;
+	struct fib_lookup_arg arg = {
+		.result = &res,
+		.flags = FIB_LOOKUP_NOREF,
+	};
+
+	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
+			       flowi6_to_flowi(flp6), 0, &arg);
+	if (err < 0)
+		return err;
+	*mrt = res.mrt;
+	return 0;
+}
+
+static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
+			     int flags, struct fib_lookup_arg *arg)
+{
+	struct ip6mr_result *res = arg->result;
+	struct mr6_table *mrt;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		return -ENETUNREACH;
+	case FR_ACT_PROHIBIT:
+		return -EACCES;
+	case FR_ACT_BLACKHOLE:
+	default:
+		return -EINVAL;
+	}
+
+	mrt = ip6mr_get_table(rule->fr_net, rule->table);
+	if (mrt == NULL)
+		return -EAGAIN;
+	res->mrt = mrt;
+	return 0;
+}
+
+static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
+{
+	return 1;
+}
+
+static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
+	FRA_GENERIC_POLICY,
+};
+
+static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+				struct fib_rule_hdr *frh, struct nlattr **tb)
+{
+	return 0;
+}
+
+static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			      struct nlattr **tb)
+{
+	return 1;
+}
+
+static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			   struct fib_rule_hdr *frh)
+{
+	frh->dst_len = 0;
+	frh->src_len = 0;
+	frh->tos     = 0;
+	return 0;
+}
+
+static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
+	.family		= RTNL_FAMILY_IP6MR,
+	.rule_size	= sizeof(struct ip6mr_rule),
+	.addr_size	= sizeof(struct in6_addr),
+	.action		= ip6mr_rule_action,
+	.match		= ip6mr_rule_match,
+	.configure	= ip6mr_rule_configure,
+	.compare	= ip6mr_rule_compare,
+	.default_pref	= fib_default_rule_pref,
+	.fill		= ip6mr_rule_fill,
+	.nlgroup	= RTNLGRP_IPV6_RULE,
+	.policy		= ip6mr_rule_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+	struct fib_rules_ops *ops;
+	struct mr6_table *mrt;
+	int err;
+
+	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
+
+	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
+	if (err < 0)
+		goto err2;
+
+	net->ipv6.mr6_rules_ops = ops;
+	return 0;
+
+err2:
+	kfree(mrt);
+err1:
+	fib_rules_unregister(ops);
+	return err;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+	struct mr6_table *mrt, *next;
+
+	rtnl_lock();
+	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
+		list_del(&mrt->list);
+		ip6mr_free_table(mrt);
+	}
+	rtnl_unlock();
+	fib_rules_unregister(net->ipv6.mr6_rules_ops);
+}
+#else
+#define ip6mr_for_each_table(mrt, net) \
+	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	return net->ipv6.mrt6;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
+			    struct mr6_table **mrt)
+{
+	*mrt = net->ipv6.mrt6;
+	return 0;
+}
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
+	return net->ipv6.mrt6 ? 0 : -ENOMEM;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+	rtnl_lock();
+	ip6mr_free_table(net->ipv6.mrt6);
+	net->ipv6.mrt6 = NULL;
+	rtnl_unlock();
+}
+#endif
+
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+{
+	struct mr6_table *mrt;
+	unsigned int i;
+
+	mrt = ip6mr_get_table(net, id);
+	if (mrt != NULL)
+		return mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL)
+		return NULL;
+	mrt->id = id;
+	write_pnet(&mrt->net, net);
+
+	/* Forwarding cache */
+	for (i = 0; i < MFC6_LINES; i++)
+		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+
+	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	mrt->mroute_reg_vif_num = -1;
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
+#endif
+	return mrt;
+}
+
+static void ip6mr_free_table(struct mr6_table *mrt)
+{
+	del_timer(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt);
+	kfree(mrt);
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct ipmr_mfc_iter {
+	struct seq_net_private p;
+	struct mr6_table *mrt;
+	struct list_head *cache;
+	int ct;
+};
+
+
+static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
+					   struct ipmr_mfc_iter *it, loff_t pos)
+{
+	struct mr6_table *mrt = it->mrt;
+	struct mfc6_cache *mfc;
+
+	read_lock(&mrt_lock);
+	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
+		it->cache = &mrt->mfc6_cache_array[it->ct];
+		list_for_each_entry(mfc, it->cache, list)
+			if (pos-- == 0)
+				return mfc;
+	}
+	read_unlock(&mrt_lock);
+
+	spin_lock_bh(&mfc_unres_lock);
+	it->cache = &mrt->mfc6_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
+		if (pos-- == 0)
+			return mfc;
+	spin_unlock_bh(&mfc_unres_lock);
+
+	it->cache = NULL;
+	return NULL;
+}
+
+/*
+ *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
+struct ipmr_vif_iter {
+	struct seq_net_private p;
+	struct mr6_table *mrt;
+	int ct;
+};
+
+static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
+					    struct ipmr_vif_iter *iter,
+					    loff_t pos)
+{
+	struct mr6_table *mrt = iter->mrt;
+
+	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+		if (!MIF_EXISTS(mrt, iter->ct))
+			continue;
+		if (pos-- == 0)
+			return &mrt->vif6_table[iter->ct];
+	}
+	return NULL;
+}
+
+static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(mrt_lock)
+{
+	struct ipmr_vif_iter *iter = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
+
+	iter->mrt = mrt;
+
+	read_lock(&mrt_lock);
+	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
+		: SEQ_START_TOKEN;
+}
+
+static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ipmr_vif_iter *iter = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = iter->mrt;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip6mr_vif_seq_idx(net, iter, 0);
+
+	while (++iter->ct < mrt->maxvif) {
+		if (!MIF_EXISTS(mrt, iter->ct))
+			continue;
+		return &mrt->vif6_table[iter->ct];
+	}
+	return NULL;
+}
+
+static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
+	__releases(mrt_lock)
+{
+	read_unlock(&mrt_lock);
+}
+
+static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
+{
+	struct ipmr_vif_iter *iter = seq->private;
+	struct mr6_table *mrt = iter->mrt;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq,
+			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
+	} else {
+		const struct mif_device *vif = v;
+		const char *name = vif->dev ? vif->dev->name : "none";
+
+		seq_printf(seq,
+			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
+			   vif - mrt->vif6_table,
+			   name, vif->bytes_in, vif->pkt_in,
+			   vif->bytes_out, vif->pkt_out,
+			   vif->flags);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip6mr_vif_seq_ops = {
+	.start = ip6mr_vif_seq_start,
+	.next  = ip6mr_vif_seq_next,
+	.stop  = ip6mr_vif_seq_stop,
+	.show  = ip6mr_vif_seq_show,
+};
+
+static int ip6mr_vif_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
+			    sizeof(struct ipmr_vif_iter));
+}
+
+static const struct file_operations ip6mr_vif_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip6mr_vif_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct ipmr_mfc_iter *it = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
+
+	it->mrt = mrt;
+	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
+		: SEQ_START_TOKEN;
+}
+
+static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct mfc6_cache *mfc = v;
+	struct ipmr_mfc_iter *it = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = it->mrt;
+
+	++*pos;
+
+	if (v == SEQ_START_TOKEN)
+		return ipmr_mfc_seq_idx(net, seq->private, 0);
+
+	if (mfc->list.next != it->cache)
+		return list_entry(mfc->list.next, struct mfc6_cache, list);
+
+	if (it->cache == &mrt->mfc6_unres_queue)
+		goto end_of_list;
+
+	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
+
+	while (++it->ct < MFC6_LINES) {
+		it->cache = &mrt->mfc6_cache_array[it->ct];
+		if (list_empty(it->cache))
+			continue;
+		return list_first_entry(it->cache, struct mfc6_cache, list);
+	}
+
+	/* exhausted cache_array, show unresolved */
+	read_unlock(&mrt_lock);
+	it->cache = &mrt->mfc6_unres_queue;
+	it->ct = 0;
+
+	spin_lock_bh(&mfc_unres_lock);
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mfc6_cache, list);
+
+ end_of_list:
+	spin_unlock_bh(&mfc_unres_lock);
+	it->cache = NULL;
+
+	return NULL;
+}
+
+static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+	struct ipmr_mfc_iter *it = seq->private;
+	struct mr6_table *mrt = it->mrt;
+
+	if (it->cache == &mrt->mfc6_unres_queue)
+		spin_unlock_bh(&mfc_unres_lock);
+	else if (it->cache == mrt->mfc6_cache_array)
+		read_unlock(&mrt_lock);
+}
+
+static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
+{
+	int n;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq,
+			 "Group                            "
+			 "Origin                           "
+			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
+	} else {
+		const struct mfc6_cache *mfc = v;
+		const struct ipmr_mfc_iter *it = seq->private;
+		struct mr6_table *mrt = it->mrt;
+
+		seq_printf(seq, "%pI6 %pI6 %-3hd",
+			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
+			   mfc->mf6c_parent);
+
+		if (it->cache != &mrt->mfc6_unres_queue) {
+			seq_printf(seq, " %8lu %8lu %8lu",
+				   mfc->mfc_un.res.pkt,
+				   mfc->mfc_un.res.bytes,
+				   mfc->mfc_un.res.wrong_if);
+			for (n = mfc->mfc_un.res.minvif;
+			     n < mfc->mfc_un.res.maxvif; n++) {
+				if (MIF_EXISTS(mrt, n) &&
+				    mfc->mfc_un.res.ttls[n] < 255)
+					seq_printf(seq,
+						   " %2d:%-3d",
+						   n, mfc->mfc_un.res.ttls[n]);
+			}
+		} else {
+			/* unresolved mfc_caches don't contain
+			 * pkt, bytes and wrong_if values
+			 */
+			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
+		}
+		seq_putc(seq, '\n');
+	}
+	return 0;
+}
+
+static const struct seq_operations ipmr_mfc_seq_ops = {
+	.start = ipmr_mfc_seq_start,
+	.next  = ipmr_mfc_seq_next,
+	.stop  = ipmr_mfc_seq_stop,
+	.show  = ipmr_mfc_seq_show,
+};
+
+static int ipmr_mfc_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
+			    sizeof(struct ipmr_mfc_iter));
+}
+
+static const struct file_operations ip6mr_mfc_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ipmr_mfc_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+#endif
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+
+static int pim6_rcv(struct sk_buff *skb)
+{
+	struct pimreghdr *pim;
+	struct ipv6hdr   *encap;
+	struct net_device  *reg_dev = NULL;
+	struct net *net = dev_net(skb->dev);
+	struct mr6_table *mrt;
+	struct flowi6 fl6 = {
+		.flowi6_iif	= skb->dev->ifindex,
+		.flowi6_mark	= skb->mark,
+	};
+	int reg_vif_num;
+
+	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
+		goto drop;
+
+	pim = (struct pimreghdr *)skb_transport_header(skb);
+	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
+	    (pim->flags & PIM_NULL_REGISTER) ||
+	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+			     sizeof(*pim), IPPROTO_PIM,
+			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
+	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
+		goto drop;
+
+	/* check if the inner packet is destined to mcast group */
+	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
+				   sizeof(*pim));
+
+	if (!ipv6_addr_is_multicast(&encap->daddr) ||
+	    encap->payload_len == 0 ||
+	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
+		goto drop;
+
+	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
+		goto drop;
+	reg_vif_num = mrt->mroute_reg_vif_num;
+
+	read_lock(&mrt_lock);
+	if (reg_vif_num >= 0)
+		reg_dev = mrt->vif6_table[reg_vif_num].dev;
+	if (reg_dev)
+		dev_hold(reg_dev);
+	read_unlock(&mrt_lock);
+
+	if (reg_dev == NULL)
+		goto drop;
+
+	skb->mac_header = skb->network_header;
+	skb_pull(skb, (u8 *)encap - skb->data);
+	skb_reset_network_header(skb);
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
+
+	netif_rx(skb);
+
+	dev_put(reg_dev);
+	return 0;
+ drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+static const struct inet6_protocol pim6_protocol = {
+	.handler	=	pim6_rcv,
+};
+
+/* Service routines creating virtual interfaces: PIMREG */
+
+static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+	struct mr6_table *mrt;
+	struct flowi6 fl6 = {
+		.flowi6_oif	= dev->ifindex,
+		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
+		.flowi6_mark	= skb->mark,
+	};
+	int err;
+
+	err = ip6mr_fib_lookup(net, &fl6, &mrt);
+	if (err < 0) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	read_lock(&mrt_lock);
+	dev->stats.tx_bytes += skb->len;
+	dev->stats.tx_packets++;
+	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
+	read_unlock(&mrt_lock);
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops reg_vif_netdev_ops = {
+	.ndo_start_xmit	= reg_vif_xmit,
+};
+
+static void reg_vif_setup(struct net_device *dev)
+{
+	dev->type		= ARPHRD_PIMREG;
+	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
+	dev->flags		= IFF_NOARP;
+	dev->netdev_ops		= &reg_vif_netdev_ops;
+	dev->destructor		= free_netdev;
+	dev->features		|= NETIF_F_NETNS_LOCAL;
+}
+
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+{
+	struct net_device *dev;
+	char name[IFNAMSIZ];
+
+	if (mrt->id == RT6_TABLE_DFLT)
+		sprintf(name, "pim6reg");
+	else
+		sprintf(name, "pim6reg%u", mrt->id);
+
+	dev = alloc_netdev(0, name, reg_vif_setup);
+	if (dev == NULL)
+		return NULL;
+
+	dev_net_set(dev, net);
+
+	if (register_netdevice(dev)) {
+		free_netdev(dev);
+		return NULL;
+	}
+	dev->iflink = 0;
+
+	if (dev_open(dev))
+		goto failure;
+
+	dev_hold(dev);
+	return dev;
+
+failure:
+	/* allow the register to be completed before unregistering. */
+	rtnl_unlock();
+	rtnl_lock();
+
+	unregister_netdevice(dev);
+	return NULL;
+}
+#endif
+
+/*
+ *	Delete a VIF entry
+ */
+
+static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
+{
+	struct mif_device *v;
+	struct net_device *dev;
+	struct inet6_dev *in6_dev;
+
+	if (vifi < 0 || vifi >= mrt->maxvif)
+		return -EADDRNOTAVAIL;
+
+	v = &mrt->vif6_table[vifi];
+
+	write_lock_bh(&mrt_lock);
+	dev = v->dev;
+	v->dev = NULL;
+
+	if (!dev) {
+		write_unlock_bh(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	}
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	if (vifi == mrt->mroute_reg_vif_num)
+		mrt->mroute_reg_vif_num = -1;
+#endif
+
+	if (vifi + 1 == mrt->maxvif) {
+		int tmp;
+		for (tmp = vifi - 1; tmp >= 0; tmp--) {
+			if (MIF_EXISTS(mrt, tmp))
+				break;
+		}
+		mrt->maxvif = tmp + 1;
+	}
+
+	write_unlock_bh(&mrt_lock);
+
+	dev_set_allmulti(dev, -1);
+
+	in6_dev = __in6_dev_get(dev);
+	if (in6_dev) {
+		in6_dev->cnf.mc_forwarding--;
+		inet6_netconf_notify_devconf(dev_net(dev),
+					     NETCONFA_MC_FORWARDING,
+					     dev->ifindex, &in6_dev->cnf);
+	}
+
+	if (v->flags & MIFF_REGISTER)
+		unregister_netdevice_queue(dev, head);
+
+	dev_put(dev);
+	return 0;
+}
+
+static inline void ip6mr_cache_free(struct mfc6_cache *c)
+{
+	kmem_cache_free(mrt_cachep, c);
+}
+
+/* Destroy an unresolved cache entry, killing queued skbs
+   and reporting error to netlink readers.
+ */
+
+static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+{
+	struct net *net = read_pnet(&mrt->net);
+	struct sk_buff *skb;
+
+	atomic_dec(&mrt->cache_resolve_queue_len);
+
+	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+		if (ipv6_hdr(skb)->version == 0) {
+			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
+			nlh->nlmsg_type = NLMSG_ERROR;
+			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
+			skb_trim(skb, nlh->nlmsg_len);
+			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
+			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
+		} else
+			kfree_skb(skb);
+	}
+
+	ip6mr_cache_free(c);
+}
+
+
+/* Timer process for all the unresolved queue. */
+
+static void ipmr_do_expire_process(struct mr6_table *mrt)
+{
+	unsigned long now = jiffies;
+	unsigned long expires = 10 * HZ;
+	struct mfc6_cache *c, *next;
+
+	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+		if (time_after(c->mfc_un.unres.expires, now)) {
+			/* not yet... */
+			unsigned long interval = c->mfc_un.unres.expires - now;
+			if (interval < expires)
+				expires = interval;
+			continue;
+		}
+
+		list_del(&c->list);
+		mr6_netlink_event(mrt, c, RTM_DELROUTE);
+		ip6mr_destroy_unres(mrt, c);
+	}
+
+	if (!list_empty(&mrt->mfc6_unres_queue))
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
+}
+
+static void ipmr_expire_process(unsigned long arg)
+{
+	struct mr6_table *mrt = (struct mr6_table *)arg;
+
+	if (!spin_trylock(&mfc_unres_lock)) {
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
+		return;
+	}
+
+	if (!list_empty(&mrt->mfc6_unres_queue))
+		ipmr_do_expire_process(mrt);
+
+	spin_unlock(&mfc_unres_lock);
+}
+
+/* Fill oifs list. It is called under write locked mrt_lock. */
+
+static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+				    unsigned char *ttls)
+{
+	int vifi;
+
+	cache->mfc_un.res.minvif = MAXMIFS;
+	cache->mfc_un.res.maxvif = 0;
+	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
+
+	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
+		if (MIF_EXISTS(mrt, vifi) &&
+		    ttls[vifi] && ttls[vifi] < 255) {
+			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
+			if (cache->mfc_un.res.minvif > vifi)
+				cache->mfc_un.res.minvif = vifi;
+			if (cache->mfc_un.res.maxvif <= vifi)
+				cache->mfc_un.res.maxvif = vifi + 1;
+		}
+	}
+}
+
+static int mif6_add(struct net *net, struct mr6_table *mrt,
+		    struct mif6ctl *vifc, int mrtsock)
+{
+	int vifi = vifc->mif6c_mifi;
+	struct mif_device *v = &mrt->vif6_table[vifi];
+	struct net_device *dev;
+	struct inet6_dev *in6_dev;
+	int err;
+
+	/* Is vif busy ? */
+	if (MIF_EXISTS(mrt, vifi))
+		return -EADDRINUSE;
+
+	switch (vifc->mif6c_flags) {
+#ifdef CONFIG_IPV6_PIMSM_V2
+	case MIFF_REGISTER:
+		/*
+		 * Special Purpose VIF in PIM
+		 * All the packets will be sent to the daemon
+		 */
+		if (mrt->mroute_reg_vif_num >= 0)
+			return -EADDRINUSE;
+		dev = ip6mr_reg_vif(net, mrt);
+		if (!dev)
+			return -ENOBUFS;
+		err = dev_set_allmulti(dev, 1);
+		if (err) {
+			unregister_netdevice(dev);
+			dev_put(dev);
+			return err;
+		}
+		break;
+#endif
+	case 0:
+		dev = dev_get_by_index(net, vifc->mif6c_pifi);
+		if (!dev)
+			return -EADDRNOTAVAIL;
+		err = dev_set_allmulti(dev, 1);
+		if (err) {
+			dev_put(dev);
+			return err;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	in6_dev = __in6_dev_get(dev);
+	if (in6_dev) {
+		in6_dev->cnf.mc_forwarding++;
+		inet6_netconf_notify_devconf(dev_net(dev),
+					     NETCONFA_MC_FORWARDING,
+					     dev->ifindex, &in6_dev->cnf);
+	}
+
+	/*
+	 *	Fill in the VIF structures
+	 */
+	v->rate_limit = vifc->vifc_rate_limit;
+	v->flags = vifc->mif6c_flags;
+	if (!mrtsock)
+		v->flags |= VIFF_STATIC;
+	v->threshold = vifc->vifc_threshold;
+	v->bytes_in = 0;
+	v->bytes_out = 0;
+	v->pkt_in = 0;
+	v->pkt_out = 0;
+	v->link = dev->ifindex;
+	if (v->flags & MIFF_REGISTER)
+		v->link = dev->iflink;
+
+	/* And finish update writing critical data */
+	write_lock_bh(&mrt_lock);
+	v->dev = dev;
+#ifdef CONFIG_IPV6_PIMSM_V2
+	if (v->flags & MIFF_REGISTER)
+		mrt->mroute_reg_vif_num = vifi;
+#endif
+	if (vifi + 1 > mrt->maxvif)
+		mrt->maxvif = vifi + 1;
+	write_unlock_bh(&mrt_lock);
+	return 0;
+}
+
+static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+					   const struct in6_addr *origin,
+					   const struct in6_addr *mcastgrp)
+{
+	int line = MFC6_HASH(mcastgrp, origin);
+	struct mfc6_cache *c;
+
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
+		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
+		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
+			return c;
+	}
+	return NULL;
+}
+
+/* Look for a (*,*,oif) entry */
+static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
+						      mifi_t mifi)
+{
+	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
+	struct mfc6_cache *c;
+
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+		if (ipv6_addr_any(&c->mf6c_origin) &&
+		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
+		    (c->mfc_un.res.ttls[mifi] < 255))
+			return c;
+
+	return NULL;
+}
+
+/* Look for a (*,G) entry */
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+					       struct in6_addr *mcastgrp,
+					       mifi_t mifi)
+{
+	int line = MFC6_HASH(mcastgrp, &in6addr_any);
+	struct mfc6_cache *c, *proxy;
+
+	if (ipv6_addr_any(mcastgrp))
+		goto skip;
+
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+		if (ipv6_addr_any(&c->mf6c_origin) &&
+		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
+			if (c->mfc_un.res.ttls[mifi] < 255)
+				return c;
+
+			/* It's ok if the mifi is part of the static tree */
+			proxy = ip6mr_cache_find_any_parent(mrt,
+							    c->mf6c_parent);
+			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
+				return c;
+		}
+
+skip:
+	return ip6mr_cache_find_any_parent(mrt, mifi);
+}
+
+/*
+ *	Allocate a multicast cache entry
+ */
+static struct mfc6_cache *ip6mr_cache_alloc(void)
+{
+	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
+	if (c == NULL)
+		return NULL;
+	c->mfc_un.res.minvif = MAXMIFS;
+	return c;
+}
+
+static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
+{
+	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
+	if (c == NULL)
+		return NULL;
+	skb_queue_head_init(&c->mfc_un.unres.unresolved);
+	c->mfc_un.unres.expires = jiffies + 10 * HZ;
+	return c;
+}
+
+/*
+ *	A cache entry has gone into a resolved state from queued
+ */
+
+static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+				struct mfc6_cache *uc, struct mfc6_cache *c)
+{
+	struct sk_buff *skb;
+
+	/*
+	 *	Play the pending entries through our router
+	 */
+
+	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+		if (ipv6_hdr(skb)->version == 0) {
+			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
+
+			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
+			} else {
+				nlh->nlmsg_type = NLMSG_ERROR;
+				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
+				skb_trim(skb, nlh->nlmsg_len);
+				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
+			}
+			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
+		} else
+			ip6_mr_forward(net, mrt, skb, c);
+	}
+}
+
+/*
+ *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
+ *	expects the following bizarre scheme.
+ *
+ *	Called under mrt_lock.
+ */
+
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+			      mifi_t mifi, int assert)
+{
+	struct sk_buff *skb;
+	struct mrt6msg *msg;
+	int ret;
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	if (assert == MRT6MSG_WHOLEPKT)
+		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
+						+sizeof(*msg));
+	else
+#endif
+		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
+
+	if (!skb)
+		return -ENOBUFS;
+
+	/* I suppose that internal messages
+	 * do not require checksums */
+
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	if (assert == MRT6MSG_WHOLEPKT) {
+		/* Ugly, but we have no choice with this interface.
+		   Duplicate old header, fix length etc.
+		   And all this only to mangle msg->im6_msgtype and
+		   to set msg->im6_mbz to "mbz" :-)
+		 */
+		skb_push(skb, -skb_network_offset(pkt));
+
+		skb_push(skb, sizeof(*msg));
+		skb_reset_transport_header(skb);
+		msg = (struct mrt6msg *)skb_transport_header(skb);
+		msg->im6_mbz = 0;
+		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
+		msg->im6_mif = mrt->mroute_reg_vif_num;
+		msg->im6_pad = 0;
+		msg->im6_src = ipv6_hdr(pkt)->saddr;
+		msg->im6_dst = ipv6_hdr(pkt)->daddr;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else
+#endif
+	{
+	/*
+	 *	Copy the IP header
+	 */
+
+	skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
+
+	/*
+	 *	Add our header
+	 */
+	skb_put(skb, sizeof(*msg));
+	skb_reset_transport_header(skb);
+	msg = (struct mrt6msg *)skb_transport_header(skb);
+
+	msg->im6_mbz = 0;
+	msg->im6_msgtype = assert;
+	msg->im6_mif = mifi;
+	msg->im6_pad = 0;
+	msg->im6_src = ipv6_hdr(pkt)->saddr;
+	msg->im6_dst = ipv6_hdr(pkt)->daddr;
+
+	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	if (mrt->mroute6_sk == NULL) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	/*
+	 *	Deliver to user space multicast routing algorithms
+	 */
+	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+	if (ret < 0) {
+		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
+		kfree_skb(skb);
+	}
+
+	return ret;
+}
+
+/*
+ *	Queue a packet for resolution. It gets locked cache entry!
+ */
+
+static int
+ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+{
+	bool found = false;
+	int err;
+	struct mfc6_cache *c;
+
+	spin_lock_bh(&mfc_unres_lock);
+	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
+		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		/*
+		 *	Create a new entry if allowable
+		 */
+
+		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
+		    (c = ip6mr_cache_alloc_unres()) == NULL) {
+			spin_unlock_bh(&mfc_unres_lock);
+
+			kfree_skb(skb);
+			return -ENOBUFS;
+		}
+
+		/*
+		 *	Fill in the new cache entry
+		 */
+		c->mf6c_parent = -1;
+		c->mf6c_origin = ipv6_hdr(skb)->saddr;
+		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
+
+		/*
+		 *	Reflect first query at pim6sd
+		 */
+		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
+		if (err < 0) {
+			/* If the report failed throw the cache entry
+			   out - Brad Parker
+			 */
+			spin_unlock_bh(&mfc_unres_lock);
+
+			ip6mr_cache_free(c);
+			kfree_skb(skb);
+			return err;
+		}
+
+		atomic_inc(&mrt->cache_resolve_queue_len);
+		list_add(&c->list, &mrt->mfc6_unres_queue);
+		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
+
+		ipmr_do_expire_process(mrt);
+	}
+
+	/*
+	 *	See if we can append the packet
+	 */
+	if (c->mfc_un.unres.unresolved.qlen > 3) {
+		kfree_skb(skb);
+		err = -ENOBUFS;
+	} else {
+		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+		err = 0;
+	}
+
+	spin_unlock_bh(&mfc_unres_lock);
+	return err;
+}
+
+/*
+ *	MFC6 cache manipulation by user space
+ */
+
+static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+			    int parent)
+{
+	int line;
+	struct mfc6_cache *c, *next;
+
+	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+
+	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
+		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
+		    ipv6_addr_equal(&c->mf6c_mcastgrp,
+				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
+		    (parent == -1 || parent == c->mf6c_parent)) {
+			write_lock_bh(&mrt_lock);
+			list_del(&c->list);
+			write_unlock_bh(&mrt_lock);
+
+			mr6_netlink_event(mrt, c, RTM_DELROUTE);
+			ip6mr_cache_free(c);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int ip6mr_device_event(struct notifier_block *this,
+			      unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct mr6_table *mrt;
+	struct mif_device *v;
+	int ct;
+	LIST_HEAD(list);
+
+	if (event != NETDEV_UNREGISTER)
+		return NOTIFY_DONE;
+
+	ip6mr_for_each_table(mrt, net) {
+		v = &mrt->vif6_table[0];
+		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
+			if (v->dev == dev)
+				mif6_delete(mrt, ct, &list);
+		}
+	}
+	unregister_netdevice_many(&list);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ip6_mr_notifier = {
+	.notifier_call = ip6mr_device_event
+};
+
+/*
+ *	Setup for IP multicast routing
+ */
+
+static int __net_init ip6mr_net_init(struct net *net)
+{
+	int err;
+
+	err = ip6mr_rules_init(net);
+	if (err < 0)
+		goto fail;
+
+#ifdef CONFIG_PROC_FS
+	err = -ENOMEM;
+	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
+		goto proc_vif_fail;
+	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
+		goto proc_cache_fail;
+#endif
+
+	return 0;
+
+#ifdef CONFIG_PROC_FS
+proc_cache_fail:
+	remove_proc_entry("ip6_mr_vif", net->proc_net);
+proc_vif_fail:
+	ip6mr_rules_exit(net);
+#endif
+fail:
+	return err;
+}
+
+static void __net_exit ip6mr_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ip6_mr_cache", net->proc_net);
+	remove_proc_entry("ip6_mr_vif", net->proc_net);
+#endif
+	ip6mr_rules_exit(net);
+}
+
+static struct pernet_operations ip6mr_net_ops = {
+	.init = ip6mr_net_init,
+	.exit = ip6mr_net_exit,
+};
+
+int __init ip6_mr_init(void)
+{
+	int err;
+
+	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
+				       sizeof(struct mfc6_cache),
+				       0, SLAB_HWCACHE_ALIGN,
+				       NULL);
+	if (!mrt_cachep)
+		return -ENOMEM;
+
+	err = register_pernet_subsys(&ip6mr_net_ops);
+	if (err)
+		goto reg_pernet_fail;
+
+	err = register_netdevice_notifier(&ip6_mr_notifier);
+	if (err)
+		goto reg_notif_fail;
+#ifdef CONFIG_IPV6_PIMSM_V2
+	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
+		pr_err("%s: can't add PIM protocol\n", __func__);
+		err = -EAGAIN;
+		goto add_proto_fail;
+	}
+#endif
+	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
+		      ip6mr_rtm_dumproute, NULL);
+	return 0;
+#ifdef CONFIG_IPV6_PIMSM_V2
+add_proto_fail:
+	unregister_netdevice_notifier(&ip6_mr_notifier);
+#endif
+reg_notif_fail:
+	unregister_pernet_subsys(&ip6mr_net_ops);
+reg_pernet_fail:
+	kmem_cache_destroy(mrt_cachep);
+	return err;
+}
+
+void ip6_mr_cleanup(void)
+{
+	unregister_netdevice_notifier(&ip6_mr_notifier);
+	unregister_pernet_subsys(&ip6mr_net_ops);
+	kmem_cache_destroy(mrt_cachep);
+}
+
+static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+			 struct mf6cctl *mfc, int mrtsock, int parent)
+{
+	bool found = false;
+	int line;
+	struct mfc6_cache *uc, *c;
+	unsigned char ttls[MAXMIFS];
+	int i;
+
+	if (mfc->mf6cc_parent >= MAXMIFS)
+		return -ENFILE;
+
+	memset(ttls, 255, MAXMIFS);
+	for (i = 0; i < MAXMIFS; i++) {
+		if (IF_ISSET(i, &mfc->mf6cc_ifset))
+			ttls[i] = 1;
+
+	}
+
+	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
+		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
+		    ipv6_addr_equal(&c->mf6c_mcastgrp,
+				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
+		    (parent == -1 || parent == mfc->mf6cc_parent)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (found) {
+		write_lock_bh(&mrt_lock);
+		c->mf6c_parent = mfc->mf6cc_parent;
+		ip6mr_update_thresholds(mrt, c, ttls);
+		if (!mrtsock)
+			c->mfc_flags |= MFC_STATIC;
+		write_unlock_bh(&mrt_lock);
+		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
+		return 0;
+	}
+
+	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
+	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
+		return -EINVAL;
+
+	c = ip6mr_cache_alloc();
+	if (c == NULL)
+		return -ENOMEM;
+
+	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
+	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
+	c->mf6c_parent = mfc->mf6cc_parent;
+	ip6mr_update_thresholds(mrt, c, ttls);
+	if (!mrtsock)
+		c->mfc_flags |= MFC_STATIC;
+
+	write_lock_bh(&mrt_lock);
+	list_add(&c->list, &mrt->mfc6_cache_array[line]);
+	write_unlock_bh(&mrt_lock);
+
+	/*
+	 *	Check to see if we resolved a queued list. If so we
+	 *	need to send on the frames and tidy up.
+	 */
+	found = false;
+	spin_lock_bh(&mfc_unres_lock);
+	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
+		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
+			list_del(&uc->list);
+			atomic_dec(&mrt->cache_resolve_queue_len);
+			found = true;
+			break;
+		}
+	}
+	if (list_empty(&mrt->mfc6_unres_queue))
+		del_timer(&mrt->ipmr_expire_timer);
+	spin_unlock_bh(&mfc_unres_lock);
+
+	if (found) {
+		ip6mr_cache_resolve(net, mrt, uc, c);
+		ip6mr_cache_free(uc);
+	}
+	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
+	return 0;
+}
+
+/*
+ *	Close the multicast socket, and clear the vif tables etc
+ */
+
+static void mroute_clean_tables(struct mr6_table *mrt)
+{
+	int i;
+	LIST_HEAD(list);
+	struct mfc6_cache *c, *next;
+
+	/*
+	 *	Shut down all active vif entries
+	 */
+	for (i = 0; i < mrt->maxvif; i++) {
+		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
+			mif6_delete(mrt, i, &list);
+	}
+	unregister_netdevice_many(&list);
+
+	/*
+	 *	Wipe the cache
+	 */
+	for (i = 0; i < MFC6_LINES; i++) {
+		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
+			if (c->mfc_flags & MFC_STATIC)
+				continue;
+			write_lock_bh(&mrt_lock);
+			list_del(&c->list);
+			write_unlock_bh(&mrt_lock);
+
+			mr6_netlink_event(mrt, c, RTM_DELROUTE);
+			ip6mr_cache_free(c);
+		}
+	}
+
+	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+		spin_lock_bh(&mfc_unres_lock);
+		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+			list_del(&c->list);
+			mr6_netlink_event(mrt, c, RTM_DELROUTE);
+			ip6mr_destroy_unres(mrt, c);
+		}
+		spin_unlock_bh(&mfc_unres_lock);
+	}
+}
+
+static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+{
+	int err = 0;
+	struct net *net = sock_net(sk);
+
+	rtnl_lock();
+	write_lock_bh(&mrt_lock);
+	if (likely(mrt->mroute6_sk == NULL)) {
+		mrt->mroute6_sk = sk;
+		net->ipv6.devconf_all->mc_forwarding++;
+		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+					     NETCONFA_IFINDEX_ALL,
+					     net->ipv6.devconf_all);
+	}
+	else
+		err = -EADDRINUSE;
+	write_unlock_bh(&mrt_lock);
+
+	rtnl_unlock();
+
+	return err;
+}
+
+int ip6mr_sk_done(struct sock *sk)
+{
+	int err = -EACCES;
+	struct net *net = sock_net(sk);
+	struct mr6_table *mrt;
+
+	rtnl_lock();
+	ip6mr_for_each_table(mrt, net) {
+		if (sk == mrt->mroute6_sk) {
+			write_lock_bh(&mrt_lock);
+			mrt->mroute6_sk = NULL;
+			net->ipv6.devconf_all->mc_forwarding--;
+			inet6_netconf_notify_devconf(net,
+						     NETCONFA_MC_FORWARDING,
+						     NETCONFA_IFINDEX_ALL,
+						     net->ipv6.devconf_all);
+			write_unlock_bh(&mrt_lock);
+
+			mroute_clean_tables(mrt);
+			err = 0;
+			break;
+		}
+	}
+	rtnl_unlock();
+
+	return err;
+}
+
+struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+{
+	struct mr6_table *mrt;
+	struct flowi6 fl6 = {
+		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
+		.flowi6_oif	= skb->dev->ifindex,
+		.flowi6_mark	= skb->mark,
+	};
+
+	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
+		return NULL;
+
+	return mrt->mroute6_sk;
+}
+
+/*
+ *	Socket options and virtual interface manipulation. The whole
+ *	virtual interface system is a complete heap, but unfortunately
+ *	that's how BSD mrouted happens to think. Maybe one day with a proper
+ *	MOSPF/PIM router set up we can clean this up.
+ */
+
+int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
+{
+	int ret, parent = 0;
+	struct mif6ctl vif;
+	struct mf6cctl mfc;
+	mifi_t mifi;
+	struct net *net = sock_net(sk);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
+
+	if (optname != MRT6_INIT) {
+		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+			return -EACCES;
+	}
+
+	switch (optname) {
+	case MRT6_INIT:
+		if (sk->sk_type != SOCK_RAW ||
+		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+			return -EOPNOTSUPP;
+		if (optlen < sizeof(int))
+			return -EINVAL;
+
+		return ip6mr_sk_init(mrt, sk);
+
+	case MRT6_DONE:
+		return ip6mr_sk_done(sk);
+
+	case MRT6_ADD_MIF:
+		if (optlen < sizeof(vif))
+			return -EINVAL;
+		if (copy_from_user(&vif, optval, sizeof(vif)))
+			return -EFAULT;
+		if (vif.mif6c_mifi >= MAXMIFS)
+			return -ENFILE;
+		rtnl_lock();
+		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+		rtnl_unlock();
+		return ret;
+
+	case MRT6_DEL_MIF:
+		if (optlen < sizeof(mifi_t))
+			return -EINVAL;
+		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
+			return -EFAULT;
+		rtnl_lock();
+		ret = mif6_delete(mrt, mifi, NULL);
+		rtnl_unlock();
+		return ret;
+
+	/*
+	 *	Manipulate the forwarding caches. These live
+	 *	in a sort of kernel/user symbiosis.
+	 */
+	case MRT6_ADD_MFC:
+	case MRT6_DEL_MFC:
+		parent = -1;
+	case MRT6_ADD_MFC_PROXY:
+	case MRT6_DEL_MFC_PROXY:
+		if (optlen < sizeof(mfc))
+			return -EINVAL;
+		if (copy_from_user(&mfc, optval, sizeof(mfc)))
+			return -EFAULT;
+		if (parent == 0)
+			parent = mfc.mf6cc_parent;
+		rtnl_lock();
+		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
+			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
+		else
+			ret = ip6mr_mfc_add(net, mrt, &mfc,
+					    sk == mrt->mroute6_sk, parent);
+		rtnl_unlock();
+		return ret;
+
+	/*
+	 *	Control PIM assert (to activate pim will activate assert)
+	 */
+	case MRT6_ASSERT:
+	{
+		int v;
+
+		if (optlen != sizeof(v))
+			return -EINVAL;
+		if (get_user(v, (int __user *)optval))
+			return -EFAULT;
+		mrt->mroute_do_assert = v;
+		return 0;
+	}
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	case MRT6_PIM:
+	{
+		int v;
+
+		if (optlen != sizeof(v))
+			return -EINVAL;
+		if (get_user(v, (int __user *)optval))
+			return -EFAULT;
+		v = !!v;
+		rtnl_lock();
+		ret = 0;
+		if (v != mrt->mroute_do_pim) {
+			mrt->mroute_do_pim = v;
+			mrt->mroute_do_assert = v;
+		}
+		rtnl_unlock();
+		return ret;
+	}
+
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+	case MRT6_TABLE:
+	{
+		u32 v;
+
+		if (optlen != sizeof(u32))
+			return -EINVAL;
+		if (get_user(v, (u32 __user *)optval))
+			return -EFAULT;
+		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
+		if (v != RT_TABLE_DEFAULT && v >= 100000000)
+			return -EINVAL;
+		if (sk == mrt->mroute6_sk)
+			return -EBUSY;
+
+		rtnl_lock();
+		ret = 0;
+		if (!ip6mr_new_table(net, v))
+			ret = -ENOMEM;
+		raw6_sk(sk)->ip6mr_table = v;
+		rtnl_unlock();
+		return ret;
+	}
+#endif
+	/*
+	 *	Spurious command, or MRT6_VERSION which you cannot
+	 *	set.
+	 */
+	default:
+		return -ENOPROTOOPT;
+	}
+}
+
+/*
+ *	Getsock opt support for the multicast routing system.
+ */
+
+int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
+			  int __user *optlen)
+{
+	int olr;
+	int val;
+	struct net *net = sock_net(sk);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
+
+	switch (optname) {
+	case MRT6_VERSION:
+		val = 0x0305;
+		break;
+#ifdef CONFIG_IPV6_PIMSM_V2
+	case MRT6_PIM:
+		val = mrt->mroute_do_pim;
+		break;
+#endif
+	case MRT6_ASSERT:
+		val = mrt->mroute_do_assert;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (get_user(olr, optlen))
+		return -EFAULT;
+
+	olr = min_t(int, olr, sizeof(int));
+	if (olr < 0)
+		return -EINVAL;
+
+	if (put_user(olr, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, olr))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ *	The IP multicast ioctl support routines.
+ */
+
+int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
+{
+	struct sioc_sg_req6 sr;
+	struct sioc_mif_req6 vr;
+	struct mif_device *vif;
+	struct mfc6_cache *c;
+	struct net *net = sock_net(sk);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
+
+	switch (cmd) {
+	case SIOCGETMIFCNT_IN6:
+		if (copy_from_user(&vr, arg, sizeof(vr)))
+			return -EFAULT;
+		if (vr.mifi >= mrt->maxvif)
+			return -EINVAL;
+		read_lock(&mrt_lock);
+		vif = &mrt->vif6_table[vr.mifi];
+		if (MIF_EXISTS(mrt, vr.mifi)) {
+			vr.icount = vif->pkt_in;
+			vr.ocount = vif->pkt_out;
+			vr.ibytes = vif->bytes_in;
+			vr.obytes = vif->bytes_out;
+			read_unlock(&mrt_lock);
+
+			if (copy_to_user(arg, &vr, sizeof(vr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	case SIOCGETSGCNT_IN6:
+		if (copy_from_user(&sr, arg, sizeof(sr)))
+			return -EFAULT;
+
+		read_lock(&mrt_lock);
+		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+		if (c) {
+			sr.pktcnt = c->mfc_un.res.pkt;
+			sr.bytecnt = c->mfc_un.res.bytes;
+			sr.wrong_if = c->mfc_un.res.wrong_if;
+			read_unlock(&mrt_lock);
+
+			if (copy_to_user(arg, &sr, sizeof(sr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_sioc_sg_req6 {
+	struct sockaddr_in6 src;
+	struct sockaddr_in6 grp;
+	compat_ulong_t pktcnt;
+	compat_ulong_t bytecnt;
+	compat_ulong_t wrong_if;
+};
+
+struct compat_sioc_mif_req6 {
+	mifi_t	mifi;
+	compat_ulong_t icount;
+	compat_ulong_t ocount;
+	compat_ulong_t ibytes;
+	compat_ulong_t obytes;
+};
+
+int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+	struct compat_sioc_sg_req6 sr;
+	struct compat_sioc_mif_req6 vr;
+	struct mif_device *vif;
+	struct mfc6_cache *c;
+	struct net *net = sock_net(sk);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
+
+	switch (cmd) {
+	case SIOCGETMIFCNT_IN6:
+		if (copy_from_user(&vr, arg, sizeof(vr)))
+			return -EFAULT;
+		if (vr.mifi >= mrt->maxvif)
+			return -EINVAL;
+		read_lock(&mrt_lock);
+		vif = &mrt->vif6_table[vr.mifi];
+		if (MIF_EXISTS(mrt, vr.mifi)) {
+			vr.icount = vif->pkt_in;
+			vr.ocount = vif->pkt_out;
+			vr.ibytes = vif->bytes_in;
+			vr.obytes = vif->bytes_out;
+			read_unlock(&mrt_lock);
+
+			if (copy_to_user(arg, &vr, sizeof(vr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	case SIOCGETSGCNT_IN6:
+		if (copy_from_user(&sr, arg, sizeof(sr)))
+			return -EFAULT;
+
+		read_lock(&mrt_lock);
+		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+		if (c) {
+			sr.pktcnt = c->mfc_un.res.pkt;
+			sr.bytecnt = c->mfc_un.res.bytes;
+			sr.wrong_if = c->mfc_un.res.wrong_if;
+			read_unlock(&mrt_lock);
+
+			if (copy_to_user(arg, &sr, sizeof(sr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+#endif
+
+static inline int ip6mr_forward2_finish(struct sk_buff *skb)
+{
+	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+			 IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+			 IPSTATS_MIB_OUTOCTETS, skb->len);
+	return dst_output(skb);
+}
+
+/*
+ *	Processing handlers for ip6mr_forward
+ */
+
+static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+{
+	struct ipv6hdr *ipv6h;
+	struct mif_device *vif = &mrt->vif6_table[vifi];
+	struct net_device *dev;
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	if (vif->dev == NULL)
+		goto out_free;
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	if (vif->flags & MIFF_REGISTER) {
+		vif->pkt_out++;
+		vif->bytes_out += skb->len;
+		vif->dev->stats.tx_bytes += skb->len;
+		vif->dev->stats.tx_packets++;
+		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
+		goto out_free;
+	}
+#endif
+
+	ipv6h = ipv6_hdr(skb);
+
+	fl6 = (struct flowi6) {
+		.flowi6_oif = vif->link,
+		.daddr = ipv6h->daddr,
+	};
+
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (dst->error) {
+		dst_release(dst);
+		goto out_free;
+	}
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	/*
+	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+	 * not only before forwarding, but after forwarding on all output
+	 * interfaces. It is clear, if mrouter runs a multicasting
+	 * program, it should receive packets not depending to what interface
+	 * program is joined.
+	 * If we will not make it, the program will have to join on all
+	 * interfaces. On the other hand, multihoming host (or router, but
+	 * not mrouter) cannot join to more than one interface - it will
+	 * result in receiving multiple packets.
+	 */
+	dev = vif->dev;
+	skb->dev = dev;
+	vif->pkt_out++;
+	vif->bytes_out += skb->len;
+
+	/* We are about to write */
+	/* XXX: extension headers? */
+	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
+		goto out_free;
+
+	ipv6h = ipv6_hdr(skb);
+	ipv6h->hop_limit--;
+
+	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
+
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
+		       ip6mr_forward2_finish);
+
+out_free:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+{
+	int ct;
+
+	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
+		if (mrt->vif6_table[ct].dev == dev)
+			break;
+	}
+	return ct;
+}
+
+static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+			   struct sk_buff *skb, struct mfc6_cache *cache)
+{
+	int psend = -1;
+	int vif, ct;
+	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
+
+	vif = cache->mf6c_parent;
+	cache->mfc_un.res.pkt++;
+	cache->mfc_un.res.bytes += skb->len;
+
+	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+		struct mfc6_cache *cache_proxy;
+
+		/* For an (*,G) entry, we only check that the incomming
+		 * interface is part of the static tree.
+		 */
+		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+		if (cache_proxy &&
+		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+			goto forward;
+	}
+
+	/*
+	 * Wrong interface: drop packet and (maybe) send PIM assert.
+	 */
+	if (mrt->vif6_table[vif].dev != skb->dev) {
+		cache->mfc_un.res.wrong_if++;
+
+		if (true_vifi >= 0 && mrt->mroute_do_assert &&
+		    /* pimsm uses asserts, when switching from RPT to SPT,
+		       so that we cannot check that packet arrived on an oif.
+		       It is bad, but otherwise we would need to move pretty
+		       large chunk of pimd to kernel. Ough... --ANK
+		     */
+		    (mrt->mroute_do_pim ||
+		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
+		    time_after(jiffies,
+			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
+			cache->mfc_un.res.last_assert = jiffies;
+			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
+		}
+		goto dont_forward;
+	}
+
+forward:
+	mrt->vif6_table[vif].pkt_in++;
+	mrt->vif6_table[vif].bytes_in += skb->len;
+
+	/*
+	 *	Forward the frame
+	 */
+	if (ipv6_addr_any(&cache->mf6c_origin) &&
+	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+		if (true_vifi >= 0 &&
+		    true_vifi != cache->mf6c_parent &&
+		    ipv6_hdr(skb)->hop_limit >
+				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+			/* It's an (*,*) entry and the packet is not coming from
+			 * the upstream: forward the packet to the upstream
+			 * only.
+			 */
+			psend = cache->mf6c_parent;
+			goto last_forward;
+		}
+		goto dont_forward;
+	}
+	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+		/* For (*,G) entry, don't forward to the incoming interface */
+		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
+		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+			if (psend != -1) {
+				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+				if (skb2)
+					ip6mr_forward2(net, mrt, skb2, cache, psend);
+			}
+			psend = ct;
+		}
+	}
+last_forward:
+	if (psend != -1) {
+		ip6mr_forward2(net, mrt, skb, cache, psend);
+		return;
+	}
+
+dont_forward:
+	kfree_skb(skb);
+}
+
+
+/*
+ *	Multicast packets for forwarding arrive here
+ */
+
+int ip6_mr_input(struct sk_buff *skb)
+{
+	struct mfc6_cache *cache;
+	struct net *net = dev_net(skb->dev);
+	struct mr6_table *mrt;
+	struct flowi6 fl6 = {
+		.flowi6_iif	= skb->dev->ifindex,
+		.flowi6_mark	= skb->mark,
+	};
+	int err;
+
+	err = ip6mr_fib_lookup(net, &fl6, &mrt);
+	if (err < 0) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	read_lock(&mrt_lock);
+	cache = ip6mr_cache_find(mrt,
+				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
+	if (cache == NULL) {
+		int vif = ip6mr_find_vif(mrt, skb->dev);
+
+		if (vif >= 0)
+			cache = ip6mr_cache_find_any(mrt,
+						     &ipv6_hdr(skb)->daddr,
+						     vif);
+	}
+
+	/*
+	 *	No usable cache entry
+	 */
+	if (cache == NULL) {
+		int vif;
+
+		vif = ip6mr_find_vif(mrt, skb->dev);
+		if (vif >= 0) {
+			int err = ip6mr_cache_unresolved(mrt, vif, skb);
+			read_unlock(&mrt_lock);
+
+			return err;
+		}
+		read_unlock(&mrt_lock);
+		kfree_skb(skb);
+		return -ENODEV;
+	}
+
+	ip6_mr_forward(net, mrt, skb, cache);
+
+	read_unlock(&mrt_lock);
+
+	return 0;
+}
+
+
+static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+			       struct mfc6_cache *c, struct rtmsg *rtm)
+{
+	int ct;
+	struct rtnexthop *nhp;
+	struct nlattr *mp_attr;
+	struct rta_mfc_stats mfcs;
+
+	/* If cache is unresolved, don't try to parse IIF and OIF */
+	if (c->mf6c_parent >= MAXMIFS)
+		return -ENOENT;
+
+	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
+	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
+		return -EMSGSIZE;
+	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+	if (mp_attr == NULL)
+		return -EMSGSIZE;
+
+	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+			if (nhp == NULL) {
+				nla_nest_cancel(skb, mp_attr);
+				return -EMSGSIZE;
+			}
+
+			nhp->rtnh_flags = 0;
+			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
+			nhp->rtnh_len = sizeof(*nhp);
+		}
+	}
+
+	nla_nest_end(skb, mp_attr);
+
+	mfcs.mfcs_packets = c->mfc_un.res.pkt;
+	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
+		return -EMSGSIZE;
+
+	rtm->rtm_type = RTN_MULTICAST;
+	return 1;
+}
+
+int ip6mr_get_route(struct net *net,
+		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+{
+	int err;
+	struct mr6_table *mrt;
+	struct mfc6_cache *cache;
+	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
+
+	read_lock(&mrt_lock);
+	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+	if (!cache && skb->dev) {
+		int vif = ip6mr_find_vif(mrt, skb->dev);
+
+		if (vif >= 0)
+			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
+						     vif);
+	}
+
+	if (!cache) {
+		struct sk_buff *skb2;
+		struct ipv6hdr *iph;
+		struct net_device *dev;
+		int vif;
+
+		if (nowait) {
+			read_unlock(&mrt_lock);
+			return -EAGAIN;
+		}
+
+		dev = skb->dev;
+		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
+			read_unlock(&mrt_lock);
+			return -ENODEV;
+		}
+
+		/* really correct? */
+		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+		if (!skb2) {
+			read_unlock(&mrt_lock);
+			return -ENOMEM;
+		}
+
+		skb_reset_transport_header(skb2);
+
+		skb_put(skb2, sizeof(struct ipv6hdr));
+		skb_reset_network_header(skb2);
+
+		iph = ipv6_hdr(skb2);
+		iph->version = 0;
+		iph->priority = 0;
+		iph->flow_lbl[0] = 0;
+		iph->flow_lbl[1] = 0;
+		iph->flow_lbl[2] = 0;
+		iph->payload_len = 0;
+		iph->nexthdr = IPPROTO_NONE;
+		iph->hop_limit = 0;
+		iph->saddr = rt->rt6i_src.addr;
+		iph->daddr = rt->rt6i_dst.addr;
+
+		err = ip6mr_cache_unresolved(mrt, vif, skb2);
+		read_unlock(&mrt_lock);
+
+		return err;
+	}
+
+	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
+		cache->mfc_flags |= MFC_NOTIFY;
+
+	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+	read_unlock(&mrt_lock);
+	return err;
+}
+
+static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
+			     int flags)
+{
+	struct nlmsghdr *nlh;
+	struct rtmsg *rtm;
+	int err;
+
+	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
+	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
+	rtm->rtm_dst_len  = 128;
+	rtm->rtm_src_len  = 128;
+	rtm->rtm_tos      = 0;
+	rtm->rtm_table    = mrt->id;
+	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
+		goto nla_put_failure;
+	rtm->rtm_type = RTN_MULTICAST;
+	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
+	if (c->mfc_flags & MFC_STATIC)
+		rtm->rtm_protocol = RTPROT_STATIC;
+	else
+		rtm->rtm_protocol = RTPROT_MROUTED;
+	rtm->rtm_flags    = 0;
+
+	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
+	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
+		goto nla_put_failure;
+	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+	/* do not break the dump if cache is unresolved */
+	if (err < 0 && err != -ENOENT)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int mr6_msgsize(bool unresolved, int maxvif)
+{
+	size_t len =
+		NLMSG_ALIGN(sizeof(struct rtmsg))
+		+ nla_total_size(4)	/* RTA_TABLE */
+		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
+		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
+		;
+
+	if (!unresolved)
+		len = len
+		      + nla_total_size(4)	/* RTA_IIF */
+		      + nla_total_size(0)	/* RTA_MULTIPATH */
+		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
+						/* RTA_MFC_STATS */
+		      + nla_total_size(sizeof(struct rta_mfc_stats))
+		;
+
+	return len;
+}
+
+static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+			      int cmd)
+{
+	struct net *net = read_pnet(&mrt->net);
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+			GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
+	if (err < 0)
+		goto errout;
+
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
+	return;
+
+errout:
+	kfree_skb(skb);
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
+}
+
+static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct mr6_table *mrt;
+	struct mfc6_cache *mfc;
+	unsigned int t = 0, s_t;
+	unsigned int h = 0, s_h;
+	unsigned int e = 0, s_e;
+
+	s_t = cb->args[0];
+	s_h = cb->args[1];
+	s_e = cb->args[2];
+
+	read_lock(&mrt_lock);
+	ip6mr_for_each_table(mrt, net) {
+		if (t < s_t)
+			goto next_table;
+		if (t > s_t)
+			s_h = 0;
+		for (h = s_h; h < MFC6_LINES; h++) {
+			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
+				if (e < s_e)
+					goto next_entry;
+				if (ip6mr_fill_mroute(mrt, skb,
+						      NETLINK_CB(cb->skb).portid,
+						      cb->nlh->nlmsg_seq,
+						      mfc, RTM_NEWROUTE,
+						      NLM_F_MULTI) < 0)
+					goto done;
+next_entry:
+				e++;
+			}
+			e = s_e = 0;
+		}
+		spin_lock_bh(&mfc_unres_lock);
+		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
+			if (e < s_e)
+				goto next_entry2;
+			if (ip6mr_fill_mroute(mrt, skb,
+					      NETLINK_CB(cb->skb).portid,
+					      cb->nlh->nlmsg_seq,
+					      mfc, RTM_NEWROUTE,
+					      NLM_F_MULTI) < 0) {
+				spin_unlock_bh(&mfc_unres_lock);
+				goto done;
+			}
+next_entry2:
+			e++;
+		}
+		spin_unlock_bh(&mfc_unres_lock);
+		e = s_e = 0;
+		s_h = 0;
+next_table:
+		t++;
+	}
+done:
+	read_unlock(&mrt_lock);
+
+	cb->args[2] = e;
+	cb->args[1] = h;
+	cb->args[0] = t;
+
+	return skb->len;
+}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 85bfbc69b2c..d1c793cffcb 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -9,35 +9,35 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
-/* 
+/*
  * [Memo]
  *
  * Outbound:
- *  The compression of IP datagram MUST be done before AH/ESP processing, 
- *  fragmentation, and the addition of Hop-by-Hop/Routing header. 
+ *  The compression of IP datagram MUST be done before AH/ESP processing,
+ *  fragmentation, and the addition of Hop-by-Hop/Routing header.
  *
  * Inbound:
- *  The decompression of IP datagram MUST be done after the reassembly, 
+ *  The decompression of IP datagram MUST be done after the reassembly,
  *  AH/ESP processing.
  */
-#include <linux/config.h>
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/module.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/ipcomp.h>
-#include <asm/scatterlist.h>
-#include <asm/semaphore.h>
 #include <linux/crypto.h>
+#include <linux/err.h>
 #include <linux/pfkeyv2.h>
 #include <linux/random.h>
 #include <linux/percpu.h>
@@ -45,194 +45,63 @@
 #include <linux/list.h>
 #include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
+#include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
+#include <linux/mutex.h>
 
-struct ipcomp6_tfms {
-	struct list_head list;
-	struct crypto_tfm **tfms;
-	int users;
-};
-
-static DECLARE_MUTEX(ipcomp6_resource_sem);
-static void **ipcomp6_scratches;
-static int ipcomp6_scratch_users;
-static LIST_HEAD(ipcomp6_tfms_list);
-
-static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
-{
-	int err = 0;
-	u8 nexthdr = 0;
-	int hdr_len = skb->h.raw - skb->nh.raw;
-	unsigned char *tmp_hdr = NULL;
-	struct ipv6hdr *iph;
-	int plen, dlen;
-	struct ipcomp_data *ipcd = x->data;
-	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
-	int cpu;
-
-	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-		skb_linearize(skb, GFP_ATOMIC) != 0) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	skb->ip_summed = CHECKSUM_NONE;
-
-	/* Remove ipcomp header and decompress original payload */
-	iph = skb->nh.ipv6h;
-	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
-	if (!tmp_hdr)
-		goto out;
-	memcpy(tmp_hdr, iph, hdr_len);
-	nexthdr = *(u8 *)skb->data;
-	skb_pull(skb, sizeof(struct ipv6_comp_hdr)); 
-	skb->nh.raw += sizeof(struct ipv6_comp_hdr);
-	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
-	iph = skb->nh.ipv6h;
-	iph->payload_len = htons(ntohs(iph->payload_len) - sizeof(struct ipv6_comp_hdr));
-	skb->h.raw = skb->data;
-
-	/* decompression */
-	plen = skb->len;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->data;
-
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
-	if (err) {
-		err = -EINVAL;
-		goto out_put_cpu;
-	}
-
-	if (dlen < (plen + sizeof(struct ipv6_comp_hdr))) {
-		err = -EINVAL;
-		goto out_put_cpu;
-	}
-
-	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
-	if (err) {
-		goto out_put_cpu;
-	}
-
-	skb_put(skb, dlen - plen);
-	memcpy(skb->data, scratch, dlen);
-
-	iph = skb->nh.ipv6h;
-	iph->payload_len = htons(skb->len);
-	
-out_put_cpu:
-	put_cpu();
-out:
-	if (tmp_hdr)
-		kfree(tmp_hdr);
-	if (err)
-		goto error_out;
-	return nexthdr;
-error_out:
-	return err;
-}
-
-static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
+static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+				u8 type, u8 code, int offset, __be32 info)
 {
-	int err;
-	struct ipv6hdr *top_iph;
-	int hdr_len;
-	struct ipv6_comp_hdr *ipch;
-	struct ipcomp_data *ipcd = x->data;
-	int plen, dlen;
-	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
-	int cpu;
-
-	hdr_len = skb->h.raw - skb->data;
-
-	/* check whether datagram len is larger than threshold */
-	if ((skb->len - hdr_len) < ipcd->threshold) {
-		goto out_ok;
-	}
-
-	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-		skb_linearize(skb, GFP_ATOMIC) != 0) {
-		goto out_ok;
-	}
-
-	/* compression */
-	plen = skb->len - hdr_len;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->h.raw;
-
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
-	if (err || (dlen + sizeof(struct ipv6_comp_hdr)) >= plen) {
-		put_cpu();
-		goto out_ok;
-	}
-	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
-	put_cpu();
-	pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr));
-
-	/* insert ipcomp header and replace datagram */
-	top_iph = (struct ipv6hdr *)skb->data;
-
-	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
-
-	ipch = (struct ipv6_comp_hdr *)start;
-	ipch->nexthdr = *skb->nh.raw;
-	ipch->flags = 0;
-	ipch->cpi = htons((u16 )ntohl(x->id.spi));
-	*skb->nh.raw = IPPROTO_COMP;
-
-out_ok:
-	return 0;
-}
-
-static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-		                int type, int code, int offset, __u32 info)
-{
-	u32 spi;
-	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
-	struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset);
+	struct net *net = dev_net(skb->dev);
+	__be32 spi;
+	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+	struct ip_comp_hdr *ipcomph =
+		(struct ip_comp_hdr *)(skb->data + offset);
 	struct xfrm_state *x;
 
-	if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
-		return;
+	if (type != ICMPV6_PKT_TOOBIG &&
+	    type != NDISC_REDIRECT)
+		return 0;
 
-	spi = ntohl(ntohs(ipcomph->cpi));
-	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
+	spi = htonl(ntohs(ipcomph->cpi));
+	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+			      spi, IPPROTO_COMP, AF_INET6);
 	if (!x)
-		return;
+		return 0;
 
-	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/"
-			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-			spi, NIP6(iph->daddr));
+	if (type == NDISC_REDIRECT)
+		ip6_redirect(skb, net, skb->dev->ifindex, 0);
+	else
+		ip6_update_pmtu(skb, net, info, 0, 0);
 	xfrm_state_put(x);
+
+	return 0;
 }
 
 static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 {
+	struct net *net = xs_net(x);
 	struct xfrm_state *t = NULL;
 
-	t = xfrm_state_alloc();
+	t = xfrm_state_alloc(net);
 	if (!t)
 		goto out;
 
 	t->id.proto = IPPROTO_IPV6;
-	t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr);
+	t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr);
+	if (!t->id.spi)
+		goto error;
+
 	memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET6;
-	t->props.mode = 1;
+	t->props.mode = x->props.mode;
 	memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
+	memcpy(&t->mark, &x->mark, sizeof(t->mark));
 
 	if (xfrm_init_state(t))
 		goto error;
@@ -243,19 +112,23 @@ out:
 	return t;
 
 error:
+	t->km.state = XFRM_STATE_DEAD;
 	xfrm_state_put(t);
+	t = NULL;
 	goto out;
 }
 
 static int ipcomp6_tunnel_attach(struct xfrm_state *x)
 {
+	struct net *net = xs_net(x);
 	int err = 0;
 	struct xfrm_state *t = NULL;
-	u32 spi;
+	__be32 spi;
+	u32 mark = x->mark.m & x->mark.v;
 
-	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
+	spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr);
 	if (spi)
-		t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr,
+		t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr,
 					      spi, IPPROTO_IPV6, AF_INET6);
 	if (!t) {
 		t = ipcomp6_tunnel_create(x);
@@ -273,228 +146,69 @@ out:
 	return err;
 }
 
-static void ipcomp6_free_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (--ipcomp6_scratch_users)
-		return;
-
-	scratches = ipcomp6_scratches;
-	if (!scratches)
-		return;
-
-	for_each_cpu(i) {
-		void *scratch = *per_cpu_ptr(scratches, i);
-		if (scratch)
-			vfree(scratch);
-	}
-
-	free_percpu(scratches);
-}
-
-static void **ipcomp6_alloc_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (ipcomp6_scratch_users++)
-		return ipcomp6_scratches;
-
-	scratches = alloc_percpu(void *);
-	if (!scratches)
-		return NULL;
-
-	ipcomp6_scratches = scratches;
-
-	for_each_cpu(i) {
-		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
-		if (!scratch)
-			return NULL;
-		*per_cpu_ptr(scratches, i) = scratch;
-	}
-
-	return scratches;
-}
-
-static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
-{
-	struct ipcomp6_tfms *pos;
-	int cpu;
-
-	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
-		if (pos->tfms == tfms)
-			break;
-	}
-
-	BUG_TRAP(pos);
-
-	if (--pos->users)
-		return;
-
-	list_del(&pos->list);
-	kfree(pos);
-
-	if (!tfms)
-		return;
-
-	for_each_cpu(cpu) {
-		struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
-		crypto_free_tfm(tfm);
-	}
-	free_percpu(tfms);
-}
-
-static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
-{
-	struct ipcomp6_tfms *pos;
-	struct crypto_tfm **tfms;
-	int cpu;
-
-	/* This can be any valid CPU ID so we don't need locking. */
-	cpu = raw_smp_processor_id();
-
-	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
-		struct crypto_tfm *tfm;
-
-		tfms = pos->tfms;
-		tfm = *per_cpu_ptr(tfms, cpu);
-
-		if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
-			pos->users++;
-			return tfms;
-		}
-	}
-
-	pos = kmalloc(sizeof(*pos), GFP_KERNEL);
-	if (!pos)
-		return NULL;
-
-	pos->users = 1;
-	INIT_LIST_HEAD(&pos->list);
-	list_add(&pos->list, &ipcomp6_tfms_list);
-
-	pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
-	if (!tfms)
-		goto error;
-
-	for_each_cpu(cpu) {
-		struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
-		if (!tfm)
-			goto error;
-		*per_cpu_ptr(tfms, cpu) = tfm;
-	}
-
-	return tfms;
-
-error:
-	ipcomp6_free_tfms(tfms);
-	return NULL;
-}
-
-static void ipcomp6_free_data(struct ipcomp_data *ipcd)
-{
-	if (ipcd->tfms)
-		ipcomp6_free_tfms(ipcd->tfms);
-	ipcomp6_free_scratches();
-}
-
-static void ipcomp6_destroy(struct xfrm_state *x)
-{
-	struct ipcomp_data *ipcd = x->data;
-	if (!ipcd)
-		return;
-	xfrm_state_delete_tunnel(x);
-	down(&ipcomp6_resource_sem);
-	ipcomp6_free_data(ipcd);
-	up(&ipcomp6_resource_sem);
-	kfree(ipcd);
-
-	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
-}
-
 static int ipcomp6_init_state(struct xfrm_state *x)
 {
-	int err;
-	struct ipcomp_data *ipcd;
-	struct xfrm_algo_desc *calg_desc;
-
-	err = -EINVAL;
-	if (!x->calg)
-		goto out;
-
-	if (x->encap)
-		goto out;
+	int err = -EINVAL;
 
-	err = -ENOMEM;
-	ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
-	if (!ipcd)
-		goto out;
-
-	memset(ipcd, 0, sizeof(*ipcd));
 	x->props.header_len = 0;
-	if (x->props.mode)
+	switch (x->props.mode) {
+	case XFRM_MODE_TRANSPORT:
+		break;
+	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
-	
-	down(&ipcomp6_resource_sem);
-	if (!ipcomp6_alloc_scratches())
-		goto error;
+		break;
+	default:
+		goto out;
+	}
 
-	ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name);
-	if (!ipcd->tfms)
-		goto error;
-	up(&ipcomp6_resource_sem);
+	err = ipcomp_init_state(x);
+	if (err)
+		goto out;
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp6_tunnel_attach(x);
 		if (err)
-			goto error_tunnel;
+			goto out;
 	}
 
-	calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
-	BUG_ON(!calg_desc);
-	ipcd->threshold = calg_desc->uinfo.comp.threshold;
-	x->data = ipcd;
 	err = 0;
 out:
 	return err;
-error_tunnel:
-	down(&ipcomp6_resource_sem);
-error:
-	ipcomp6_free_data(ipcd);
-	up(&ipcomp6_resource_sem);
-	kfree(ipcd);
+}
 
-	goto out;
+static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
+{
+	return 0;
 }
 
-static struct xfrm_type ipcomp6_type = 
+static const struct xfrm_type ipcomp6_type =
 {
 	.description	= "IPCOMP6",
 	.owner		= THIS_MODULE,
 	.proto		= IPPROTO_COMP,
 	.init_state	= ipcomp6_init_state,
-	.destructor	= ipcomp6_destroy,
-	.input		= ipcomp6_input,
-	.output		= ipcomp6_output,
+	.destructor	= ipcomp_destroy,
+	.input		= ipcomp_input,
+	.output		= ipcomp_output,
+	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
-static struct inet6_protocol ipcomp6_protocol = 
+static struct xfrm6_protocol ipcomp6_protocol =
 {
 	.handler	= xfrm6_rcv,
+	.cb_handler	= ipcomp6_rcv_cb,
 	.err_handler	= ipcomp6_err,
-	.flags		= INET6_PROTO_NOPOLICY,
+	.priority	= 0,
 };
 
 static int __init ipcomp6_init(void)
 {
 	if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) {
-		printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n");
+		pr_info("%s: can't add xfrm type\n", __func__);
 		return -EAGAIN;
 	}
-	if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
-		printk(KERN_INFO "ipcomp6 init: can't add protocol\n");
+	if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+		pr_info("%s: can't add protocol\n", __func__);
 		xfrm_unregister_type(&ipcomp6_type, AF_INET6);
 		return -EAGAIN;
 	}
@@ -503,10 +217,10 @@ static int __init ipcomp6_init(void)
 
 static void __exit ipcomp6_fini(void)
 {
-	if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) 
-		printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n");
+	if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+		pr_info("%s: can't remove protocol\n", __func__);
 	if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
-		printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n");
+		pr_info("%s: can't remove xfrm type\n", __func__);
 }
 
 module_init(ipcomp6_init);
@@ -515,4 +229,4 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
 MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
 
-
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 8567873d0dd..edb58aff4ae 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1,14 +1,12 @@
 /*
  *	IPv6 BSD socket options interface
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	Based on linux/net/ipv4/ip_sockglue.c
  *
- *	$Id: ipv6_sockglue.c,v 1.41 2002/02/01 22:01:04 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
@@ -16,7 +14,6 @@
  *
  *	FIXME: Make the setsockopt code POSIX compliant: That is
  *
- *	o	Return -EINVAL for setsockopt of short lengths
  *	o	Truncate getsockopt returns
  *	o	Return an optlen of the truncated length if need be
  *
@@ -26,19 +23,20 @@
  */
 
 #include <linux/module.h>
-#include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
-#include <linux/sched.h>
 #include <linux/net.h>
 #include <linux/in6.h>
+#include <linux/mroute6.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -51,27 +49,22 @@
 #include <net/inet_common.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <net/udplite.h>
 #include <net/xfrm.h>
+#include <net/compat.h>
 
 #include <asm/uaccess.h>
 
-DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
-
-static struct packet_type ipv6_packet_type = {
-	.type = __constant_htons(ETH_P_IPV6), 
-	.func = ipv6_rcv,
-};
-
 struct ip6_ra_chain *ip6_ra_chain;
 DEFINE_RWLOCK(ip6_ra_lock);
 
-int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
+int ip6_ra_control(struct sock *sk, int sel)
 {
 	struct ip6_ra_chain *ra, *new_ra, **rap;
 
 	/* RA packet may be delivered ONLY to IPPROTO_RAW socket */
-	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
-		return -EINVAL;
+	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW)
+		return -ENOPROTOOPT;
 
 	new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
@@ -80,16 +73,13 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
 		if (ra->sk == sk) {
 			if (sel>=0) {
 				write_unlock_bh(&ip6_ra_lock);
-				if (new_ra)
-					kfree(new_ra);
+				kfree(new_ra);
 				return -EADDRINUSE;
 			}
 
 			*rap = ra->next;
 			write_unlock_bh(&ip6_ra_lock);
 
-			if (ra->destructor)
-				ra->destructor(sk);
 			sock_put(sk);
 			kfree(ra);
 			return 0;
@@ -101,7 +91,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
 	}
 	new_ra->sk = sk;
 	new_ra->sel = sel;
-	new_ra->destructor = destructor;
 	new_ra->next = ra;
 	*rap = new_ra;
 	sock_hold(sk);
@@ -109,37 +98,74 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
 	return 0;
 }
 
-int ipv6_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+static
+struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
+					   struct ipv6_txoptions *opt)
+{
+	if (inet_sk(sk)->is_icsk) {
+		if (opt &&
+		    !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
+		    inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) {
+			struct inet_connection_sock *icsk = inet_csk(sk);
+			icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
+			icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
+		}
+		opt = xchg(&inet6_sk(sk)->opt, opt);
+	} else {
+		spin_lock(&sk->sk_dst_lock);
+		opt = xchg(&inet6_sk(sk)->opt, opt);
+		spin_unlock(&sk->sk_dst_lock);
+	}
+	sk_dst_reset(sk);
+
+	return opt;
+}
+
+static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
+		    char __user *optval, unsigned int optlen)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net *net = sock_net(sk);
 	int val, valbool;
 	int retv = -ENOPROTOOPT;
 
-	if (level == SOL_IP && sk->sk_type != SOCK_RAW)
-		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
-
-	if(level!=SOL_IPV6)
-		goto out;
-
 	if (optval == NULL)
 		val=0;
-	else if (get_user(val, (int __user *) optval))
-		return -EFAULT;
+	else {
+		if (optlen >= sizeof(int)) {
+			if (get_user(val, (int __user *) optval))
+				return -EFAULT;
+		} else
+			val = 0;
+	}
 
 	valbool = (val!=0);
 
+	if (ip6_mroute_opt(optname))
+		return ip6_mroute_setsockopt(sk, optname, optval, optlen);
+
 	lock_sock(sk);
 
 	switch (optname) {
 
 	case IPV6_ADDRFORM:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		if (val == PF_INET) {
 			struct ipv6_txoptions *opt;
 			struct sk_buff *pktopt;
 
-			if (sk->sk_protocol != IPPROTO_UDP &&
-			    sk->sk_protocol != IPPROTO_TCP)
+			if (sk->sk_type == SOCK_RAW)
+				break;
+
+			if (sk->sk_protocol == IPPROTO_UDP ||
+			    sk->sk_protocol == IPPROTO_UDPLITE) {
+				struct udp_sock *up = udp_sk(sk);
+				if (up->pending == AF_INET6) {
+					retv = -EBUSY;
+					break;
+				}
+			} else if (sk->sk_protocol != IPPROTO_TCP)
 				break;
 
 			if (sk->sk_state != TCP_ESTABLISHED) {
@@ -148,7 +174,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 			}
 
 			if (ipv6_only_sock(sk) ||
-			    !(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) {
+			    !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
 				retv = -EADDRNOTAVAIL;
 				break;
 			}
@@ -164,23 +190,26 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 			sk_refcnt_debug_dec(sk);
 
 			if (sk->sk_protocol == IPPROTO_TCP) {
-				struct tcp_sock *tp = tcp_sk(sk);
-
+				struct inet_connection_sock *icsk = inet_csk(sk);
 				local_bh_disable();
-				sock_prot_dec_use(sk->sk_prot);
-				sock_prot_inc_use(&tcp_prot);
+				sock_prot_inuse_add(net, sk->sk_prot, -1);
+				sock_prot_inuse_add(net, &tcp_prot, 1);
 				local_bh_enable();
 				sk->sk_prot = &tcp_prot;
-				tp->af_specific = &ipv4_specific;
+				icsk->icsk_af_ops = &ipv4_specific;
 				sk->sk_socket->ops = &inet_stream_ops;
 				sk->sk_family = PF_INET;
-				tcp_sync_mss(sk, tp->pmtu_cookie);
+				tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 			} else {
+				struct proto *prot = &udp_prot;
+
+				if (sk->sk_protocol == IPPROTO_UDPLITE)
+					prot = &udplite_prot;
 				local_bh_disable();
-				sock_prot_dec_use(sk->sk_prot);
-				sock_prot_inc_use(&udp_prot);
+				sock_prot_inuse_add(net, sk->sk_prot, -1);
+				sock_prot_inuse_add(net, prot, 1);
 				local_bh_enable();
-				sk->sk_prot = &udp_prot;
+				sk->sk_prot = prot;
 				sk->sk_socket->ops = &inet_dgram_ops;
 				sk->sk_family = PF_INET;
 			}
@@ -188,8 +217,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 			if (opt)
 				sock_kfree_s(sk, opt, opt->tot_len);
 			pktopt = xchg(&np->pktoptions, NULL);
-			if (pktopt)
-				kfree_skb(pktopt);
+			kfree_skb(pktopt);
 
 			sk->sk_destruct = inet_sock_destruct;
 			/*
@@ -204,99 +232,157 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 		goto e_inval;
 
 	case IPV6_V6ONLY:
-		if (inet_sk(sk)->num)
+		if (optlen < sizeof(int) ||
+		    inet_sk(sk)->inet_num)
 			goto e_inval;
 		np->ipv6only = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_RECVPKTINFO:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.rxinfo = valbool;
 		retv = 0;
 		break;
-		
+
 	case IPV6_2292PKTINFO:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.rxoinfo = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_RECVHOPLIMIT:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.rxhlim = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_2292HOPLIMIT:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.rxohlim = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_RECVRTHDR:
-		if (val < 0 || val > 2)
+		if (optlen < sizeof(int))
 			goto e_inval;
-		np->rxopt.bits.srcrt = val;
+		np->rxopt.bits.srcrt = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_2292RTHDR:
-		if (val < 0 || val > 2)
+		if (optlen < sizeof(int))
 			goto e_inval;
-		np->rxopt.bits.osrcrt = val;
+		np->rxopt.bits.osrcrt = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_RECVHOPOPTS:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.hopopts = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_2292HOPOPTS:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.ohopopts = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_RECVDSTOPTS:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.dstopts = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_2292DSTOPTS:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.odstopts = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_TCLASS:
-		if (val < 0 || val > 0xff)
+		if (optlen < sizeof(int))
+			goto e_inval;
+		if (val < -1 || val > 0xff)
 			goto e_inval;
+		/* RFC 3542, 6.5: default traffic class of 0x0 */
+		if (val == -1)
+			val = 0;
 		np->tclass = val;
 		retv = 0;
 		break;
-		
+
 	case IPV6_RECVTCLASS:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.rxtclass = valbool;
 		retv = 0;
 		break;
 
 	case IPV6_FLOWINFO:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->rxopt.bits.rxflow = valbool;
 		retv = 0;
 		break;
 
+	case IPV6_RECVPATHMTU:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->rxopt.bits.rxpmtu = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_TRANSPARENT:
+		if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+			retv = -EPERM;
+			break;
+		}
+		if (optlen < sizeof(int))
+			goto e_inval;
+		/* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
+		inet_sk(sk)->transparent = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_RECVORIGDSTADDR:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->rxopt.bits.rxorigdstaddr = valbool;
+		retv = 0;
+		break;
+
 	case IPV6_HOPOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	case IPV6_RTHDR:
 	case IPV6_DSTOPTS:
 	{
 		struct ipv6_txoptions *opt;
+
+		/* remove any sticky options header with a zero option
+		 * length, per RFC3542.
+		 */
 		if (optlen == 0)
-			optval = 0;
+			optval = NULL;
+		else if (optval == NULL)
+			goto e_inval;
+		else if (optlen < sizeof(struct ipv6_opt_hdr) ||
+			 optlen & 0x7 || optlen > 8 * 255)
+			goto e_inval;
 
 		/* hop-by-hop / destination options are privileged option */
 		retv = -EPERM;
-		if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
-			break;
-
-		retv = -EINVAL;
-		if (optlen & 0x7 || optlen > 8 * 255)
+		if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
 			break;
 
 		opt = ipv6_renew_options(sk, np->opt, optname,
@@ -308,49 +394,63 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 		}
 
 		/* routing header option needs extra check */
-		if (optname == IPV6_RTHDR && opt->srcrt) {
+		retv = -EINVAL;
+		if (optname == IPV6_RTHDR && opt && opt->srcrt) {
 			struct ipv6_rt_hdr *rthdr = opt->srcrt;
-			if (rthdr->type)
-				goto sticky_done;
-			if ((rthdr->hdrlen & 1) ||
-			    (rthdr->hdrlen >> 1) != rthdr->segments_left)
+			switch (rthdr->type) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+			case IPV6_SRCRT_TYPE_2:
+				if (rthdr->hdrlen != 2 ||
+				    rthdr->segments_left != 1)
+					goto sticky_done;
+
+				break;
+#endif
+			default:
 				goto sticky_done;
+			}
 		}
 
 		retv = 0;
-		if (sk->sk_type == SOCK_STREAM) {
-			if (opt) {
-				struct tcp_sock *tp = tcp_sk(sk);
-				if (!((1 << sk->sk_state) &
-				      (TCPF_LISTEN | TCPF_CLOSE))
-				    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-					tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
-				}
-			}
-			opt = xchg(&np->opt, opt);
-			sk_dst_reset(sk);
-		} else {
-			write_lock(&sk->sk_dst_lock);
-			opt = xchg(&np->opt, opt);
-			write_unlock(&sk->sk_dst_lock);
-			sk_dst_reset(sk);
-		}
+		opt = ipv6_update_options(sk, opt);
 sticky_done:
 		if (opt)
 			sock_kfree_s(sk, opt, opt->tot_len);
 		break;
 	}
 
+	case IPV6_PKTINFO:
+	{
+		struct in6_pktinfo pkt;
+
+		if (optlen == 0)
+			goto e_inval;
+		else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL)
+			goto e_inval;
+
+		if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) {
+				retv = -EFAULT;
+				break;
+		}
+		if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if)
+			goto e_inval;
+
+		np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
+		np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr;
+		retv = 0;
+		break;
+	}
+
 	case IPV6_2292PKTOPTIONS:
 	{
 		struct ipv6_txoptions *opt = NULL;
 		struct msghdr msg;
-		struct flowi fl;
+		struct flowi6 fl6;
 		int junk;
 
-		fl.fl6_flowlabel = 0;
-		fl.oif = sk->sk_bound_dev_if;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_oif = sk->sk_bound_dev_if;
+		fl6.flowi6_mark = sk->sk_mark;
 
 		if (optlen == 0)
 			goto update;
@@ -376,36 +476,21 @@ sticky_done:
 		msg.msg_controllen = optlen;
 		msg.msg_control = (void*)(opt+1);
 
-		retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk);
+		retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
+					     &junk, &junk);
 		if (retv)
 			goto done;
 update:
 		retv = 0;
-		if (sk->sk_type == SOCK_STREAM) {
-			if (opt) {
-				struct tcp_sock *tp = tcp_sk(sk);
-				if (!((1 << sk->sk_state) &
-				      (TCPF_LISTEN | TCPF_CLOSE))
-				    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-					tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
-				}
-			}
-			opt = xchg(&np->opt, opt);
-			sk_dst_reset(sk);
-		} else {
-			write_lock(&sk->sk_dst_lock);
-			opt = xchg(&np->opt, opt);
-			write_unlock(&sk->sk_dst_lock);
-			sk_dst_reset(sk);
-		}
-
+		opt = ipv6_update_options(sk, opt);
 done:
 		if (opt)
 			sock_kfree_s(sk, opt, opt->tot_len);
 		break;
 	}
 	case IPV6_UNICAST_HOPS:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		if (val > 255 || val < -1)
 			goto e_inval;
 		np->hop_limit = val;
@@ -414,27 +499,72 @@ done:
 
 	case IPV6_MULTICAST_HOPS:
 		if (sk->sk_type == SOCK_STREAM)
+			break;
+		if (optlen < sizeof(int))
 			goto e_inval;
 		if (val > 255 || val < -1)
 			goto e_inval;
-		np->mcast_hops = val;
+		np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
 		retv = 0;
 		break;
 
 	case IPV6_MULTICAST_LOOP:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		if (val != valbool)
+			goto e_inval;
 		np->mc_loop = valbool;
 		retv = 0;
 		break;
 
+	case IPV6_UNICAST_IF:
+	{
+		struct net_device *dev = NULL;
+		int ifindex;
+
+		if (optlen != sizeof(int))
+			goto e_inval;
+
+		ifindex = (__force int)ntohl((__force __be32)val);
+		if (ifindex == 0) {
+			np->ucast_oif = 0;
+			retv = 0;
+			break;
+		}
+
+		dev = dev_get_by_index(net, ifindex);
+		retv = -EADDRNOTAVAIL;
+		if (!dev)
+			break;
+		dev_put(dev);
+
+		retv = -EINVAL;
+		if (sk->sk_bound_dev_if)
+			break;
+
+		np->ucast_oif = ifindex;
+		retv = 0;
+		break;
+	}
+
 	case IPV6_MULTICAST_IF:
 		if (sk->sk_type == SOCK_STREAM)
-			goto e_inval;
-		if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
+			break;
+		if (optlen < sizeof(int))
 			goto e_inval;
 
-		if (__dev_get_by_index(val) == NULL) {
-			retv = -ENODEV;
-			break;
+		if (val) {
+			struct net_device *dev;
+
+			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
+				goto e_inval;
+
+			dev = dev_get_by_index(net, val);
+			if (!dev) {
+				retv = -ENODEV;
+				break;
+			}
+			dev_put(dev);
 		}
 		np->mcast_oif = val;
 		retv = 0;
@@ -444,6 +574,13 @@ done:
 	{
 		struct ipv6_mreq mreq;
 
+		if (optlen < sizeof(struct ipv6_mreq))
+			goto e_inval;
+
+		retv = -EPROTO;
+		if (inet_sk(sk)->is_icsk)
+			break;
+
 		retv = -EFAULT;
 		if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
 			break;
@@ -459,7 +596,7 @@ done:
 	{
 		struct ipv6_mreq mreq;
 
-		if (optlen != sizeof(struct ipv6_mreq))
+		if (optlen < sizeof(struct ipv6_mreq))
 			goto e_inval;
 
 		retv = -EFAULT;
@@ -478,6 +615,9 @@ done:
 		struct group_req greq;
 		struct sockaddr_in6 *psin6;
 
+		if (optlen < sizeof(struct group_req))
+			goto e_inval;
+
 		retv = -EFAULT;
 		if (copy_from_user(&greq, optval, sizeof(struct group_req)))
 			break;
@@ -502,7 +642,7 @@ done:
 		struct group_source_req greqs;
 		int omode, add;
 
-		if (optlen != sizeof(struct group_source_req))
+		if (optlen < sizeof(struct group_source_req))
 			goto e_inval;
 		if (copy_from_user(&greqs, optval, sizeof(greqs))) {
 			retv = -EFAULT;
@@ -539,7 +679,6 @@ done:
 	}
 	case MCAST_MSFILTER:
 	{
-		extern int sysctl_mld_max_msf;
 		struct group_filter *gsf;
 
 		if (optlen < GROUP_FILTER_SIZE(0))
@@ -548,8 +687,8 @@ done:
 			retv = -ENOBUFS;
 			break;
 		}
-		gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
-		if (gsf == 0) {
+		gsf = kmalloc(optlen,GFP_KERNEL);
+		if (!gsf) {
 			retv = -ENOBUFS;
 			break;
 		}
@@ -576,27 +715,37 @@ done:
 		break;
 	}
 	case IPV6_ROUTER_ALERT:
-		retv = ip6_ra_control(sk, val, NULL);
+		if (optlen < sizeof(int))
+			goto e_inval;
+		retv = ip6_ra_control(sk, val);
 		break;
 	case IPV6_MTU_DISCOVER:
-		if (val<0 || val>2)
+		if (optlen < sizeof(int))
+			goto e_inval;
+		if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
 			goto e_inval;
 		np->pmtudisc = val;
 		retv = 0;
 		break;
 	case IPV6_MTU:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		if (val && val < IPV6_MIN_MTU)
 			goto e_inval;
 		np->frag_size = val;
 		retv = 0;
 		break;
 	case IPV6_RECVERR:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->recverr = valbool;
 		if (!val)
 			skb_queue_purge(&sk->sk_error_queue);
 		retv = 0;
 		break;
 	case IPV6_FLOWINFO_SEND:
+		if (optlen < sizeof(int))
+			goto e_inval;
 		np->sndflow = valbool;
 		retv = 0;
 		break;
@@ -606,22 +755,89 @@ done:
 	case IPV6_IPSEC_POLICY:
 	case IPV6_XFRM_POLICY:
 		retv = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			break;
 		retv = xfrm_user_policy(sk, optname, optval, optlen);
 		break;
 
-#ifdef CONFIG_NETFILTER
-	default:
-		retv = nf_setsockopt(sk, PF_INET6, optname, optval, 
-					    optlen);
-		break;
-#endif
+	case IPV6_ADDR_PREFERENCES:
+	    {
+		unsigned int pref = 0;
+		unsigned int prefmask = ~0;
+
+		if (optlen < sizeof(int))
+			goto e_inval;
+
+		retv = -EINVAL;
+
+		/* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
+		switch (val & (IPV6_PREFER_SRC_PUBLIC|
+			       IPV6_PREFER_SRC_TMP|
+			       IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
+		case IPV6_PREFER_SRC_PUBLIC:
+			pref |= IPV6_PREFER_SRC_PUBLIC;
+			break;
+		case IPV6_PREFER_SRC_TMP:
+			pref |= IPV6_PREFER_SRC_TMP;
+			break;
+		case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
+			break;
+		case 0:
+			goto pref_skip_pubtmp;
+		default:
+			goto e_inval;
+		}
+
+		prefmask &= ~(IPV6_PREFER_SRC_PUBLIC|
+			      IPV6_PREFER_SRC_TMP);
+pref_skip_pubtmp:
 
+		/* check HOME/COA conflicts */
+		switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) {
+		case IPV6_PREFER_SRC_HOME:
+			break;
+		case IPV6_PREFER_SRC_COA:
+			pref |= IPV6_PREFER_SRC_COA;
+		case 0:
+			goto pref_skip_coa;
+		default:
+			goto e_inval;
+		}
+
+		prefmask &= ~IPV6_PREFER_SRC_COA;
+pref_skip_coa:
+
+		/* check CGA/NONCGA conflicts */
+		switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
+		case IPV6_PREFER_SRC_CGA:
+		case IPV6_PREFER_SRC_NONCGA:
+		case 0:
+			break;
+		default:
+			goto e_inval;
+		}
+
+		np->srcprefs = (np->srcprefs & prefmask) | pref;
+		retv = 0;
+
+		break;
+	    }
+	case IPV6_MINHOPCOUNT:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		if (val < 0 || val > 255)
+			goto e_inval;
+		np->min_hopcount = val;
+		retv = 0;
+		break;
+	case IPV6_DONTFRAG:
+		np->dontfrag = valbool;
+		retv = 0;
+		break;
 	}
+
 	release_sock(sk);
 
-out:
 	return retv;
 
 e_inval:
@@ -629,35 +845,122 @@ e_inval:
 	return -EINVAL;
 }
 
-int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_opt_hdr *hdr,
-			   char __user *optval, int len)
+int ipv6_setsockopt(struct sock *sk, int level, int optname,
+		    char __user *optval, unsigned int optlen)
+{
+	int err;
+
+	if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+	if (level != SOL_IPV6)
+		return -ENOPROTOOPT;
+
+	err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+	/* we need to exclude all possible ENOPROTOOPTs except default case */
+	if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
+			optname != IPV6_XFRM_POLICY) {
+		lock_sock(sk);
+		err = nf_setsockopt(sk, PF_INET6, optname, optval,
+				optlen);
+		release_sock(sk);
+	}
+#endif
+	return err;
+}
+
+EXPORT_SYMBOL(ipv6_setsockopt);
+
+#ifdef CONFIG_COMPAT
+int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
+			   char __user *optval, unsigned int optlen)
 {
+	int err;
+
+	if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
+		if (udp_prot.compat_setsockopt != NULL)
+			return udp_prot.compat_setsockopt(sk, level, optname,
+							  optval, optlen);
+		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+	}
+
+	if (level != SOL_IPV6)
+		return -ENOPROTOOPT;
+
+	if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
+		return compat_mc_setsockopt(sk, level, optname, optval, optlen,
+			ipv6_setsockopt);
+
+	err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+	/* we need to exclude all possible ENOPROTOOPTs except default case */
+	if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
+	    optname != IPV6_XFRM_POLICY) {
+		lock_sock(sk);
+		err = compat_nf_setsockopt(sk, PF_INET6, optname,
+					   optval, optlen);
+		release_sock(sk);
+	}
+#endif
+	return err;
+}
+
+EXPORT_SYMBOL(compat_ipv6_setsockopt);
+#endif
+
+static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
+				  int optname, char __user *optval, int len)
+{
+	struct ipv6_opt_hdr *hdr;
+
+	if (!opt)
+		return 0;
+
+	switch(optname) {
+	case IPV6_HOPOPTS:
+		hdr = opt->hopopt;
+		break;
+	case IPV6_RTHDRDSTOPTS:
+		hdr = opt->dst0opt;
+		break;
+	case IPV6_RTHDR:
+		hdr = (struct ipv6_opt_hdr *)opt->srcrt;
+		break;
+	case IPV6_DSTOPTS:
+		hdr = opt->dst1opt;
+		break;
+	default:
+		return -EINVAL;	/* should not happen */
+	}
+
 	if (!hdr)
 		return 0;
-	len = min_t(int, len, ipv6_optlen(hdr));
-	if (copy_to_user(optval, hdr, ipv6_optlen(hdr)))
+
+	len = min_t(unsigned int, len, ipv6_optlen(hdr));
+	if (copy_to_user(optval, hdr, len))
 		return -EFAULT;
 	return len;
 }
 
-int ipv6_getsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int __user *optlen)
+static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
+		    char __user *optval, int __user *optlen, unsigned int flags)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	int len;
 	int val;
 
-	if (level == SOL_IP && sk->sk_type != SOCK_RAW)
-		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
-	if(level!=SOL_IPV6)
-		return -ENOPROTOOPT;
+	if (ip6_mroute_opt(optname))
+		return ip6_mroute_getsockopt(sk, optname, optval, optlen);
+
 	if (get_user(len, optlen))
 		return -EFAULT;
 	switch (optname) {
 	case IPV6_ADDRFORM:
 		if (sk->sk_protocol != IPPROTO_UDP &&
+		    sk->sk_protocol != IPPROTO_UDPLITE &&
 		    sk->sk_protocol != IPPROTO_TCP)
-			return -EINVAL;
+			return -ENOPROTOOPT;
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -ENOTCONN;
 		val = sk->sk_family;
@@ -671,6 +974,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 			return -EINVAL;
 		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
 			return -EFAULT;
+		if (gsf.gf_group.ss_family != AF_INET6)
+			return -EADDRNOTAVAIL;
 		lock_sock(sk);
 		err = ip6_mc_msfget(sk, &gsf,
 			(struct group_filter __user *)optval, optlen);
@@ -688,7 +993,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 
 		msg.msg_control = optval;
 		msg.msg_controllen = len;
-		msg.msg_flags = 0;
+		msg.msg_flags = flags;
 
 		lock_sock(sk);
 		skb = np->pktoptions;
@@ -697,31 +1002,42 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 		release_sock(sk);
 
 		if (skb) {
-			int err = datagram_recv_ctl(sk, &msg, skb);
+			ip6_datagram_recv_ctl(sk, &msg, skb);
 			kfree_skb(skb);
-			if (err)
-				return err;
 		} else {
 			if (np->rxopt.bits.rxinfo) {
 				struct in6_pktinfo src_info;
-				src_info.ipi6_ifindex = np->mcast_oif;
-				ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
+				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+					np->sticky_pktinfo.ipi6_ifindex;
+				src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxhlim) {
 				int hlim = np->mcast_hops;
 				put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
 			}
+			if (np->rxopt.bits.rxtclass) {
+				int tclass = (int)ip6_tclass(np->rcv_flowinfo);
+
+				put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
+			}
 			if (np->rxopt.bits.rxoinfo) {
 				struct in6_pktinfo src_info;
-				src_info.ipi6_ifindex = np->mcast_oif;
-				ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
+				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+					np->sticky_pktinfo.ipi6_ifindex;
+				src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr :
+								     np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxohlim) {
 				int hlim = np->mcast_hops;
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
 			}
+			if (np->rxopt.bits.rxflow) {
+				__be32 flowinfo = np->rcv_flowinfo;
+
+				put_cmsg(&msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+			}
 		}
 		len -= msg.msg_controllen;
 		return put_user(len, optlen);
@@ -729,14 +1045,13 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 	case IPV6_MTU:
 	{
 		struct dst_entry *dst;
-		val = 0;	
-		lock_sock(sk);
-		dst = sk_dst_get(sk);
-		if (dst) {
+
+		val = 0;
+		rcu_read_lock();
+		dst = __sk_dst_get(sk);
+		if (dst)
 			val = dst_mtu(dst);
-			dst_release(dst);
-		}
-		release_sock(sk);
+		rcu_read_unlock();
 		if (!val)
 			return -ENOTCONN;
 		break;
@@ -777,9 +1092,12 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 	{
 
 		lock_sock(sk);
-		len = ipv6_getsockopt_sticky(sk, np->opt->hopopt,
-					     optval, len);
+		len = ipv6_getsockopt_sticky(sk, np->opt,
+					     optname, optval, len);
 		release_sock(sk);
+		/* check if ipv6_getsockopt_sticky() returns err code */
+		if (len < 0)
+			return len;
 		return put_user(len, optlen);
 	}
 
@@ -811,13 +1129,68 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->rxopt.bits.rxflow;
 		break;
 
-	case IPV6_UNICAST_HOPS:
-		val = np->hop_limit;
+	case IPV6_RECVPATHMTU:
+		val = np->rxopt.bits.rxpmtu;
+		break;
+
+	case IPV6_PATHMTU:
+	{
+		struct dst_entry *dst;
+		struct ip6_mtuinfo mtuinfo;
+
+		if (len < sizeof(mtuinfo))
+			return -EINVAL;
+
+		len = sizeof(mtuinfo);
+		memset(&mtuinfo, 0, sizeof(mtuinfo));
+
+		rcu_read_lock();
+		dst = __sk_dst_get(sk);
+		if (dst)
+			mtuinfo.ip6m_mtu = dst_mtu(dst);
+		rcu_read_unlock();
+		if (!mtuinfo.ip6m_mtu)
+			return -ENOTCONN;
+
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &mtuinfo, len))
+			return -EFAULT;
+
+		return 0;
+		break;
+	}
+
+	case IPV6_TRANSPARENT:
+		val = inet_sk(sk)->transparent;
 		break;
 
+	case IPV6_RECVORIGDSTADDR:
+		val = np->rxopt.bits.rxorigdstaddr;
+		break;
+
+	case IPV6_UNICAST_HOPS:
 	case IPV6_MULTICAST_HOPS:
-		val = np->mcast_hops;
+	{
+		struct dst_entry *dst;
+
+		if (optname == IPV6_UNICAST_HOPS)
+			val = np->hop_limit;
+		else
+			val = np->mcast_hops;
+
+		if (val < 0) {
+			rcu_read_lock();
+			dst = __sk_dst_get(sk);
+			if (dst)
+				val = ip6_dst_hoplimit(dst);
+			rcu_read_unlock();
+		}
+
+		if (val < 0)
+			val = sock_net(sk)->ipv6.devconf_all->hop_limit;
 		break;
+	}
 
 	case IPV6_MULTICAST_LOOP:
 		val = np->mc_loop;
@@ -827,6 +1200,10 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->mcast_oif;
 		break;
 
+	case IPV6_UNICAST_IF:
+		val = (__force int)htonl((__u32) np->ucast_oif);
+		break;
+
 	case IPV6_MTU_DISCOVER:
 		val = np->pmtudisc;
 		break;
@@ -839,18 +1216,65 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->sndflow;
 		break;
 
+	case IPV6_FLOWLABEL_MGR:
+	{
+		struct in6_flowlabel_req freq;
+		int flags;
+
+		if (len < sizeof(freq))
+			return -EINVAL;
+
+		if (copy_from_user(&freq, optval, sizeof(freq)))
+			return -EFAULT;
+
+		if (freq.flr_action != IPV6_FL_A_GET)
+			return -EINVAL;
+
+		len = sizeof(freq);
+		flags = freq.flr_flags;
+
+		memset(&freq, 0, sizeof(freq));
+
+		val = ipv6_flowlabel_opt_get(sk, &freq, flags);
+		if (val < 0)
+			return val;
+
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &freq, len))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case IPV6_ADDR_PREFERENCES:
+		val = 0;
+
+		if (np->srcprefs & IPV6_PREFER_SRC_TMP)
+			val |= IPV6_PREFER_SRC_TMP;
+		else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC)
+			val |= IPV6_PREFER_SRC_PUBLIC;
+		else {
+			/* XXX: should we return system default? */
+			val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT;
+		}
+
+		if (np->srcprefs & IPV6_PREFER_SRC_COA)
+			val |= IPV6_PREFER_SRC_COA;
+		else
+			val |= IPV6_PREFER_SRC_HOME;
+		break;
+
+	case IPV6_MINHOPCOUNT:
+		val = np->min_hopcount;
+		break;
+
+	case IPV6_DONTFRAG:
+		val = np->dontfrag;
+		break;
+
 	default:
-#ifdef CONFIG_NETFILTER
-		lock_sock(sk);
-		val = nf_getsockopt(sk, PF_INET6, optname, optval, 
-				    &len);
-		release_sock(sk);
-		if (val >= 0)
-			val = put_user(len, optlen);
-		return val;
-#else
-		return -EINVAL;
-#endif
+		return -ENOPROTOOPT;
 	}
 	len = min_t(unsigned int, sizeof(int), len);
 	if(put_user(len, optlen))
@@ -860,12 +1284,80 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 	return 0;
 }
 
-void __init ipv6_packet_init(void)
+int ipv6_getsockopt(struct sock *sk, int level, int optname,
+		    char __user *optval, int __user *optlen)
 {
-	dev_add_pack(&ipv6_packet_type);
+	int err;
+
+	if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+	if(level != SOL_IPV6)
+		return -ENOPROTOOPT;
+
+	err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
+#ifdef CONFIG_NETFILTER
+	/* we need to exclude all possible ENOPROTOOPTs except default case */
+	if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
+		int len;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		err = nf_getsockopt(sk, PF_INET6, optname, optval,
+				&len);
+		release_sock(sk);
+		if (err >= 0)
+			err = put_user(len, optlen);
+	}
+#endif
+	return err;
 }
 
-void ipv6_packet_cleanup(void)
+EXPORT_SYMBOL(ipv6_getsockopt);
+
+#ifdef CONFIG_COMPAT
+int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
+			   char __user *optval, int __user *optlen)
 {
-	dev_remove_pack(&ipv6_packet_type);
+	int err;
+
+	if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
+		if (udp_prot.compat_getsockopt != NULL)
+			return udp_prot.compat_getsockopt(sk, level, optname,
+							  optval, optlen);
+		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+	}
+
+	if (level != SOL_IPV6)
+		return -ENOPROTOOPT;
+
+	if (optname == MCAST_MSFILTER)
+		return compat_mc_getsockopt(sk, level, optname, optval, optlen,
+			ipv6_getsockopt);
+
+	err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
+				 MSG_CMSG_COMPAT);
+#ifdef CONFIG_NETFILTER
+	/* we need to exclude all possible ENOPROTOOPTs except default case */
+	if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
+		int len;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		err = compat_nf_getsockopt(sk, PF_INET6,
+					   optname, optval, &len);
+		release_sock(sk);
+		if (err >= 0)
+			err = put_user(len, optlen);
+	}
+#endif
+	return err;
 }
+
+EXPORT_SYMBOL(compat_ipv6_getsockopt);
+#endif
+
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
deleted file mode 100644
index 37a4a99c9fe..00000000000
--- a/net/ipv6/ipv6_syms.c
+++ /dev/null
@@ -1,37 +0,0 @@
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <net/protocol.h>
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-#include <net/ip6_route.h>
-#include <net/xfrm.h>
-
-EXPORT_SYMBOL(ipv6_addr_type);
-EXPORT_SYMBOL(icmpv6_send);
-EXPORT_SYMBOL(icmpv6_statistics);
-EXPORT_SYMBOL(icmpv6_err_convert);
-EXPORT_SYMBOL(ndisc_mc_map);
-EXPORT_SYMBOL(register_inet6addr_notifier);
-EXPORT_SYMBOL(unregister_inet6addr_notifier);
-EXPORT_SYMBOL(ip6_route_output);
-EXPORT_SYMBOL(addrconf_lock);
-EXPORT_SYMBOL(ipv6_setsockopt);
-EXPORT_SYMBOL(ipv6_getsockopt);
-EXPORT_SYMBOL(inet6_register_protosw);
-EXPORT_SYMBOL(inet6_unregister_protosw);
-EXPORT_SYMBOL(inet6_add_protocol);
-EXPORT_SYMBOL(inet6_del_protocol);
-EXPORT_SYMBOL(ip6_xmit);
-EXPORT_SYMBOL(inet6_release);
-EXPORT_SYMBOL(inet6_bind);
-EXPORT_SYMBOL(inet6_getname);
-EXPORT_SYMBOL(inet6_ioctl);
-EXPORT_SYMBOL(ipv6_get_saddr);
-EXPORT_SYMBOL(ipv6_chk_addr);
-EXPORT_SYMBOL(in6_dev_finish_destroy);
-#ifdef CONFIG_XFRM
-EXPORT_SYMBOL(xfrm6_rcv);
-#endif
-EXPORT_SYMBOL(rt6_lookup);
-EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f15e04ad026..617f0958e16 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1,13 +1,11 @@
 /*
  *	Multicast support for IPv6
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: mcast.c,v 1.40 2002/02/08 03:57:19 davem Exp $
- *
- *	Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c 
+ *	Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -28,7 +26,6 @@
  *		- MLDv2 support
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -46,10 +43,14 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/pkt_sched.h>
+#include <net/mld.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 
+#include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 
@@ -59,6 +60,7 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
+#include <net/inet_common.h>
 
 #include <net/ip6_checksum.h>
 
@@ -71,64 +73,17 @@
 #define MDBG(x)
 #endif
 
-/*
- *  These header formats should be in a separate include file, but icmpv6.h
- *  doesn't have in6_addr defined in all cases, there is no __u128, and no
- *  other files reference these.
- *
- *  			+-DLS 4/14/03
- */
-
-/* Multicast Listener Discovery version 2 headers */
-
-struct mld2_grec {
-	__u8		grec_type;
-	__u8		grec_auxwords;
-	__u16		grec_nsrcs;
-	struct in6_addr	grec_mca;
-	struct in6_addr	grec_src[0];
-};
-
-struct mld2_report {
-	__u8	type;
-	__u8	resv1;
-	__u16	csum;
-	__u16	resv2;
-	__u16	ngrec;
-	struct mld2_grec grec[0];
-};
-
-struct mld2_query {
-	__u8 type;
-	__u8 code;
-	__u16 csum;
-	__u16 mrc;
-	__u16 resv1;
-	struct in6_addr mca;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u8 qrv:3,
-	     suppress:1,
-	     resv2:4;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-	__u8 resv2:4,
-	     suppress:1,
-	     qrv:3;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-	__u8 qqic;
-	__u16 nsrcs;
-	struct in6_addr srcs[0];
+/* Ensure that we have struct in6_addr aligned on 32bit word. */
+static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
+	BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4),
+	BUILD_BUG_ON_NULL(offsetof(struct mld2_report, mld2r_grec) % 4),
+	BUILD_BUG_ON_NULL(offsetof(struct mld2_grec, grec_mca) % 4)
 };
 
 static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
 
 /* Big mc list lock for all the sockets */
-static DEFINE_RWLOCK(ipv6_sk_mc_lock);
-
-static struct socket *igmp6_socket;
-
-int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr);
+static DEFINE_SPINLOCK(ipv6_sk_mc_lock);
 
 static void igmp6_join_group(struct ifmcaddr6 *ma);
 static void igmp6_leave_group(struct ifmcaddr6 *ma);
@@ -138,65 +93,76 @@ static void mld_gq_timer_expire(unsigned long data);
 static void mld_ifc_timer_expire(unsigned long data);
 static void mld_ifc_event(struct inet6_dev *idev);
 static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
-static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *addr);
+static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
 static void mld_clear_delrec(struct inet6_dev *idev);
+static bool mld_in_v1_mode(const struct inet6_dev *idev);
 static int sf_setstate(struct ifmcaddr6 *pmc);
 static void sf_markstate(struct ifmcaddr6 *pmc);
 static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
-static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
-			  int sfmode, int sfcount, struct in6_addr *psfsrc,
+static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+			  int sfmode, int sfcount, const struct in6_addr *psfsrc,
 			  int delta);
-static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
-			  int sfmode, int sfcount, struct in6_addr *psfsrc,
+static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+			  int sfmode, int sfcount, const struct in6_addr *psfsrc,
 			  int delta);
 static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 			    struct inet6_dev *idev);
 
-
-#define IGMP6_UNSOLICITED_IVAL	(10*HZ)
 #define MLD_QRV_DEFAULT		2
+/* RFC3810, 9.2. Query Interval */
+#define MLD_QI_DEFAULT		(125 * HZ)
+/* RFC3810, 9.3. Query Response Interval */
+#define MLD_QRI_DEFAULT		(10 * HZ)
 
-#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \
-		(idev)->cnf.force_mld_version == 1 || \
-		((idev)->mc_v1_seen && \
-		time_before(jiffies, (idev)->mc_v1_seen)))
+/* RFC3810, 8.1 Query Version Distinctions */
+#define MLD_V1_QUERY_LEN	24
+#define MLD_V2_QUERY_LEN_MIN	28
 
-#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
-#define MLDV2_EXP(thresh, nbmant, nbexp, value) \
-	((value) < (thresh) ? (value) : \
-	((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \
-	(MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
+#define IPV6_MLD_MAX_MSF	64
 
-#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
-#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
-
-#define IPV6_MLD_MAX_MSF	10
-
-int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
+int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
 
 /*
  *	socket join on multicast group
  */
 
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
+#define for_each_pmc_rcu(np, pmc)				\
+	for (pmc = rcu_dereference(np->ipv6_mc_list);		\
+	     pmc != NULL;					\
+	     pmc = rcu_dereference(pmc->next))
+
+static int unsolicited_report_interval(struct inet6_dev *idev)
+{
+	int iv;
+
+	if (mld_in_v1_mode(idev))
+		iv = idev->cnf.mldv1_unsolicited_report_interval;
+	else
+		iv = idev->cnf.mldv2_unsolicited_report_interval;
+
+	return iv > 0 ? iv : 1;
+}
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct net_device *dev = NULL;
 	struct ipv6_mc_socklist *mc_lst;
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net *net = sock_net(sk);
 	int err;
 
 	if (!ipv6_addr_is_multicast(addr))
 		return -EINVAL;
 
-	read_lock_bh(&ipv6_sk_mc_lock);
-	for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(np, mc_lst) {
 		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
 		    ipv6_addr_equal(&mc_lst->addr, addr)) {
-			read_unlock_bh(&ipv6_sk_mc_lock);
+			rcu_read_unlock();
 			return -EADDRINUSE;
 		}
 	}
-	read_unlock_bh(&ipv6_sk_mc_lock);
+	rcu_read_unlock();
 
 	mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
 
@@ -204,26 +170,28 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 		return -ENOMEM;
 
 	mc_lst->next = NULL;
-	ipv6_addr_copy(&mc_lst->addr, addr);
+	mc_lst->addr = *addr;
 
+	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
-		rt = rt6_lookup(addr, NULL, 0, 0);
+		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
-			dev = rt->rt6i_dev;
-			dev_hold(dev);
-			dst_release(&rt->u.dst);
+			dev = rt->dst.dev;
+			ip6_rt_put(rt);
 		}
 	} else
-		dev = dev_get_by_index(ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (dev == NULL) {
+		rcu_read_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 		return -ENODEV;
 	}
 
 	mc_lst->ifindex = dev->ifindex;
 	mc_lst->sfmode = MCAST_EXCLUDE;
+	rwlock_init(&mc_lst->sflock);
 	mc_lst->sflist = NULL;
 
 	/*
@@ -233,17 +201,17 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 	err = ipv6_dev_mc_inc(dev, addr);
 
 	if (err) {
+		rcu_read_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
-		dev_put(dev);
 		return err;
 	}
 
-	write_lock_bh(&ipv6_sk_mc_lock);
+	spin_lock(&ipv6_sk_mc_lock);
 	mc_lst->next = np->ipv6_mc_list;
-	np->ipv6_mc_list = mc_lst;
-	write_unlock_bh(&ipv6_sk_mc_lock);
+	rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
+	spin_unlock(&ipv6_sk_mc_lock);
 
-	dev_put(dev);
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -251,68 +219,75 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 /*
  *	socket leave on multicast group
  */
-int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct ipv6_mc_socklist *mc_lst, **lnk;
+	struct ipv6_mc_socklist *mc_lst;
+	struct ipv6_mc_socklist __rcu **lnk;
+	struct net *net = sock_net(sk);
 
-	write_lock_bh(&ipv6_sk_mc_lock);
-	for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) {
+	if (!ipv6_addr_is_multicast(addr))
+		return -EINVAL;
+
+	spin_lock(&ipv6_sk_mc_lock);
+	for (lnk = &np->ipv6_mc_list;
+	     (mc_lst = rcu_dereference_protected(*lnk,
+			lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ;
+	      lnk = &mc_lst->next) {
 		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
 		    ipv6_addr_equal(&mc_lst->addr, addr)) {
 			struct net_device *dev;
 
 			*lnk = mc_lst->next;
-			write_unlock_bh(&ipv6_sk_mc_lock);
+			spin_unlock(&ipv6_sk_mc_lock);
 
-			if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
-				struct inet6_dev *idev = in6_dev_get(dev);
+			rcu_read_lock();
+			dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
+			if (dev != NULL) {
+				struct inet6_dev *idev = __in6_dev_get(dev);
 
-				if (idev) {
-					(void) ip6_mc_leave_src(sk,mc_lst,idev);
+				(void) ip6_mc_leave_src(sk, mc_lst, idev);
+				if (idev)
 					__ipv6_dev_mc_dec(idev, &mc_lst->addr);
-					in6_dev_put(idev);
-				}
-				dev_put(dev);
-			}
-			sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+			} else
+				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
+			rcu_read_unlock();
+			atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+			kfree_rcu(mc_lst, rcu);
 			return 0;
 		}
 	}
-	write_unlock_bh(&ipv6_sk_mc_lock);
+	spin_unlock(&ipv6_sk_mc_lock);
 
 	return -EADDRNOTAVAIL;
 }
 
-static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex)
+/* called with rcu_read_lock() */
+static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
+					     const struct in6_addr *group,
+					     int ifindex)
 {
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 
 	if (ifindex == 0) {
-		struct rt6_info *rt;
+		struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
 
-		rt = rt6_lookup(group, NULL, 0, 0);
 		if (rt) {
-			dev = rt->rt6i_dev;
-			dev_hold(dev);
-			dst_release(&rt->u.dst);
+			dev = rt->dst.dev;
+			ip6_rt_put(rt);
 		}
 	} else
-		dev = dev_get_by_index(ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (!dev)
 		return NULL;
-	idev = in6_dev_get(dev);
-	if (!idev) {
-		dev_put(dev);
+	idev = __in6_dev_get(dev);
+	if (!idev)
 		return NULL;
-	}
 	read_lock_bh(&idev->lock);
 	if (idev->dead) {
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
-		dev_put(dev);
 		return NULL;
 	}
 	return idev;
@@ -322,31 +297,37 @@ void ipv6_sock_mc_close(struct sock *sk)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_mc_socklist *mc_lst;
+	struct net *net = sock_net(sk);
 
-	write_lock_bh(&ipv6_sk_mc_lock);
-	while ((mc_lst = np->ipv6_mc_list) != NULL) {
+	if (!rcu_access_pointer(np->ipv6_mc_list))
+		return;
+
+	spin_lock(&ipv6_sk_mc_lock);
+	while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
+				lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
 		struct net_device *dev;
 
 		np->ipv6_mc_list = mc_lst->next;
-		write_unlock_bh(&ipv6_sk_mc_lock);
+		spin_unlock(&ipv6_sk_mc_lock);
 
-		dev = dev_get_by_index(mc_lst->ifindex);
+		rcu_read_lock();
+		dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
 		if (dev) {
-			struct inet6_dev *idev = in6_dev_get(dev);
+			struct inet6_dev *idev = __in6_dev_get(dev);
 
-			if (idev) {
-				(void) ip6_mc_leave_src(sk, mc_lst, idev);
+			(void) ip6_mc_leave_src(sk, mc_lst, idev);
+			if (idev)
 				__ipv6_dev_mc_dec(idev, &mc_lst->addr);
-				in6_dev_put(idev);
-			}
-			dev_put(dev);
-		}
+		} else
+			(void) ip6_mc_leave_src(sk, mc_lst, NULL);
+		rcu_read_unlock();
 
-		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+		atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+		kfree_rcu(mc_lst, rcu);
 
-		write_lock_bh(&ipv6_sk_mc_lock);
+		spin_lock(&ipv6_sk_mc_lock);
 	}
-	write_unlock_bh(&ipv6_sk_mc_lock);
+	spin_unlock(&ipv6_sk_mc_lock);
 }
 
 int ip6_mc_source(int add, int omode, struct sock *sk,
@@ -354,33 +335,31 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 {
 	struct in6_addr *source, *group;
 	struct ipv6_mc_socklist *pmc;
-	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct ipv6_pinfo *inet6 = inet6_sk(sk);
 	struct ip6_sf_socklist *psl;
+	struct net *net = sock_net(sk);
 	int i, j, rv;
 	int leavegroup = 0;
+	int pmclocked = 0;
 	int err;
 
-	if (pgsr->gsr_group.ss_family != AF_INET6 ||
-	    pgsr->gsr_source.ss_family != AF_INET6)
-		return -EINVAL;
-
 	source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr;
 	group = &((struct sockaddr_in6 *)&pgsr->gsr_group)->sin6_addr;
 
 	if (!ipv6_addr_is_multicast(group))
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev(group, pgsr->gsr_interface);
-	if (!idev)
+	rcu_read_lock();
+	idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface);
+	if (!idev) {
+		rcu_read_unlock();
 		return -ENODEV;
-	dev = idev->dev;
+	}
 
 	err = -EADDRNOTAVAIL;
 
-	read_lock_bh(&ipv6_sk_mc_lock);
-	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(inet6, pmc) {
 		if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -403,14 +382,16 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		pmc->sfmode = omode;
 	}
 
+	write_lock(&pmc->sflock);
+	pmclocked = 1;
+
 	psl = pmc->sflist;
 	if (!add) {
 		if (!psl)
 			goto done;	/* err = -EADDRNOTAVAIL */
 		rv = !0;
 		for (i=0; i<psl->sl_count; i++) {
-			rv = memcmp(&psl->sl_addr[i], source,
-				sizeof(struct in6_addr));
+			rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
 			if (rv == 0)
 				break;
 		}
@@ -444,8 +425,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 
 		if (psl)
 			count += psl->sl_max;
-		newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
-			IP6_SFLSIZE(count), GFP_ATOMIC);
+		newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_ATOMIC);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
@@ -461,12 +441,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	}
 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
 	for (i=0; i<psl->sl_count; i++) {
-		rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
-		if (rv == 0)
-			break;
+		rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
+		if (rv == 0) /* There is an error in the address. */
+			goto done;
 	}
-	if (rv == 0)		/* address already there is an error */
-		goto done;
 	for (j=psl->sl_count-1; j>=i; j--)
 		psl->sl_addr[j+1] = psl->sl_addr[j];
 	psl->sl_addr[i] = *source;
@@ -475,10 +453,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	/* update the interface list */
 	ip6_mc_add_src(idev, group, omode, 1, source, 1);
 done:
-	read_unlock_bh(&ipv6_sk_mc_lock);
+	if (pmclocked)
+		write_unlock(&pmc->sflock);
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 	if (leavegroup)
 		return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
 	return err;
@@ -486,12 +464,12 @@ done:
 
 int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 {
-	struct in6_addr *group;
+	const struct in6_addr *group;
 	struct ipv6_mc_socklist *pmc;
-	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct ipv6_pinfo *inet6 = inet6_sk(sk);
 	struct ip6_sf_socklist *newpsl, *psl;
+	struct net *net = sock_net(sk);
 	int leavegroup = 0;
 	int i, err;
 
@@ -503,19 +481,22 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 	    gsf->gf_fmode != MCAST_EXCLUDE)
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev(group, gsf->gf_interface);
+	rcu_read_lock();
+	idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
 
-	if (!idev)
+	if (!idev) {
+		rcu_read_unlock();
 		return -ENODEV;
-	dev = idev->dev;
+	}
 
 	err = 0;
+
 	if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
 		leavegroup = 1;
 		goto done;
 	}
 
-	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(inet6, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -526,8 +507,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		goto done;
 	}
 	if (gsf->gf_numsrc) {
-		newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
-				IP6_SFLSIZE(gsf->gf_numsrc), GFP_ATOMIC);
+		newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
+							  GFP_ATOMIC);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
@@ -549,6 +530,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		newpsl = NULL;
 		(void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
 	}
+
+	write_lock(&pmc->sflock);
 	psl = pmc->sflist;
 	if (psl) {
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -558,11 +541,11 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
 	pmc->sflist = newpsl;
 	pmc->sfmode = gsf->gf_fmode;
+	write_unlock(&pmc->sflock);
 	err = 0;
 done:
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 	if (leavegroup)
 		err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
 	return err;
@@ -572,28 +555,34 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	struct group_filter __user *optval, int __user *optlen)
 {
 	int err, i, count, copycount;
-	struct in6_addr *group;
+	const struct in6_addr *group;
 	struct ipv6_mc_socklist *pmc;
 	struct inet6_dev *idev;
-	struct net_device *dev;
 	struct ipv6_pinfo *inet6 = inet6_sk(sk);
 	struct ip6_sf_socklist *psl;
+	struct net *net = sock_net(sk);
 
 	group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
 
 	if (!ipv6_addr_is_multicast(group))
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev(group, gsf->gf_interface);
+	rcu_read_lock();
+	idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
 
-	if (!idev)
+	if (!idev) {
+		rcu_read_unlock();
 		return -ENODEV;
-
-	dev = idev->dev;
+	}
 
 	err = -EADDRNOTAVAIL;
+	/*
+	 * changes to the ipv6_mc_list require the socket lock and
+	 * a read lock on ip6_sk_mc_lock. We have the socket lock,
+	 * so reading the list is safe.
+	 */
 
-	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(inet6, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(group, &pmc->addr))
@@ -605,8 +594,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	psl = pmc->sflist;
 	count = psl ? psl->sl_count : 0;
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
 	gsf->gf_numsrc = count;
@@ -614,6 +602,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	    copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
 		return -EFAULT;
 	}
+	/* changes to psl require the socket lock, a read lock on
+	 * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We
+	 * have the socket lock, so reading here is safe.
+	 */
 	for (i=0; i<copycount; i++) {
 		struct sockaddr_in6 *psin6;
 		struct sockaddr_storage ss;
@@ -622,34 +614,34 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 		memset(&ss, 0, sizeof(ss));
 		psin6->sin6_family = AF_INET6;
 		psin6->sin6_addr = psl->sl_addr[i];
-	    	if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
+		if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
 			return -EFAULT;
 	}
 	return 0;
 done:
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 	return err;
 }
 
-int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
-	struct in6_addr *src_addr)
+bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
+		    const struct in6_addr *src_addr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_mc_socklist *mc;
 	struct ip6_sf_socklist *psl;
-	int rv = 1;
+	bool rv = true;
 
-	read_lock(&ipv6_sk_mc_lock);
-	for (mc = np->ipv6_mc_list; mc; mc = mc->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(np, mc) {
 		if (ipv6_addr_equal(&mc->addr, mc_addr))
 			break;
 	}
 	if (!mc) {
-		read_unlock(&ipv6_sk_mc_lock);
-		return 1;
+		rcu_read_unlock();
+		return true;
 	}
+	read_lock(&mc->sflock);
 	psl = mc->sflist;
 	if (!psl) {
 		rv = mc->sfmode == MCAST_EXCLUDE;
@@ -661,11 +653,12 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
 				break;
 		}
 		if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
-			rv = 0;
+			rv = false;
 		if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
-			rv = 0;
+			rv = false;
 	}
-	read_unlock(&ipv6_sk_mc_lock);
+	read_unlock(&mc->sflock);
+	rcu_read_unlock();
 
 	return rv;
 }
@@ -683,18 +676,22 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	struct net_device *dev = mc->idev->dev;
 	char buf[MAX_ADDR_LEN];
 
+	if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+	    IPV6_ADDR_SCOPE_LINKLOCAL)
+		return;
+
 	spin_lock_bh(&mc->mca_lock);
 	if (!(mc->mca_flags&MAF_LOADED)) {
 		mc->mca_flags |= MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-			dev_mc_add(dev, buf, dev->addr_len, 0);
+			dev_mc_add(dev, buf);
 	}
 	spin_unlock_bh(&mc->mca_lock);
 
 	if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
 		return;
 
-	if (MLD_V1_SEEN(mc->idev)) {
+	if (mld_in_v1_mode(mc->idev)) {
 		igmp6_join_group(mc);
 		return;
 	}
@@ -709,11 +706,15 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 	struct net_device *dev = mc->idev->dev;
 	char buf[MAX_ADDR_LEN];
 
+	if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+	    IPV6_ADDR_SCOPE_LINKLOCAL)
+		return;
+
 	spin_lock_bh(&mc->mca_lock);
 	if (mc->mca_flags&MAF_LOADED) {
 		mc->mca_flags &= ~MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-			dev_mc_delete(dev, buf, dev->addr_len, 0);
+			dev_mc_del(dev, buf);
 	}
 
 	if (mc->mca_flags & MAF_NOREPORT)
@@ -744,10 +745,10 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	 * for deleted items allows change reports to use common code with
 	 * non-deleted or query-response MCA's.
 	 */
-	pmc = (struct ifmcaddr6 *)kmalloc(sizeof(*pmc), GFP_ATOMIC);
+	pmc = kzalloc(sizeof(*pmc), GFP_ATOMIC);
 	if (!pmc)
 		return;
-	memset(pmc, 0, sizeof(*pmc));
+
 	spin_lock_bh(&im->mca_lock);
 	spin_lock_init(&pmc->mca_lock);
 	pmc->idev = im->idev;
@@ -766,18 +767,18 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	}
 	spin_unlock_bh(&im->mca_lock);
 
-	write_lock_bh(&idev->mc_lock);
+	spin_lock_bh(&idev->mc_lock);
 	pmc->next = idev->mc_tomb;
 	idev->mc_tomb = pmc;
-	write_unlock_bh(&idev->mc_lock);
+	spin_unlock_bh(&idev->mc_lock);
 }
 
-static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca)
+static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
 {
 	struct ifmcaddr6 *pmc, *pmc_prev;
 	struct ip6_sf_list *psf, *psf_next;
 
-	write_lock_bh(&idev->mc_lock);
+	spin_lock_bh(&idev->mc_lock);
 	pmc_prev = NULL;
 	for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) {
 		if (ipv6_addr_equal(&pmc->mca_addr, pmca))
@@ -790,7 +791,8 @@ static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca)
 		else
 			idev->mc_tomb = pmc->next;
 	}
-	write_unlock_bh(&idev->mc_lock);
+	spin_unlock_bh(&idev->mc_lock);
+
 	if (pmc) {
 		for (psf=pmc->mca_tomb; psf; psf=psf_next) {
 			psf_next = psf->sf_next;
@@ -805,10 +807,10 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 {
 	struct ifmcaddr6 *pmc, *nextpmc;
 
-	write_lock_bh(&idev->mc_lock);
+	spin_lock_bh(&idev->mc_lock);
 	pmc = idev->mc_tomb;
 	idev->mc_tomb = NULL;
-	write_unlock_bh(&idev->mc_lock);
+	spin_unlock_bh(&idev->mc_lock);
 
 	for (; pmc; pmc = nextpmc) {
 		nextpmc = pmc->next;
@@ -838,11 +840,12 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 /*
  *	device multicast group inc (add if not found)
  */
-int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
+int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct ifmcaddr6 *mc;
 	struct inet6_dev *idev;
 
+	/* we need to take a reference on idev */
 	idev = in6_dev_get(dev);
 
 	if (idev == NULL)
@@ -870,7 +873,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
 	 *	not found: create a new one.
 	 */
 
-	mc = kmalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC);
+	mc = kzalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC);
 
 	if (mc == NULL) {
 		write_unlock_bh(&idev->lock);
@@ -878,13 +881,10 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
 		return -ENOMEM;
 	}
 
-	memset(mc, 0, sizeof(struct ifmcaddr6));
-	init_timer(&mc->mca_timer);
-	mc->mca_timer.function = igmp6_timer_handler;
-	mc->mca_timer.data = (unsigned long) mc;
+	setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
 
-	ipv6_addr_copy(&mc->mca_addr, addr);
-	mc->idev = idev;
+	mc->mca_addr = *addr;
+	mc->idev = idev; /* (reference taken) */
 	mc->mca_users = 1;
 	/* mca_stamp should be updated upon changes */
 	mc->mca_cstamp = mc->mca_tstamp = jiffies;
@@ -912,7 +912,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
 /*
  *	device multicast group del
  */
-int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr)
+int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct ifmcaddr6 *ma, **map;
 
@@ -937,59 +937,35 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	return -ENOENT;
 }
 
-int ipv6_dev_mc_dec(struct net_device *dev, struct in6_addr *addr)
+int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
 {
-	struct inet6_dev *idev = in6_dev_get(dev);
+	struct inet6_dev *idev;
 	int err;
 
-	if (!idev)
-		return -ENODEV;
+	rcu_read_lock();
 
-	err = __ipv6_dev_mc_dec(idev, addr);
-
-	in6_dev_put(idev);
+	idev = __in6_dev_get(dev);
+	if (!idev)
+		err = -ENODEV;
+	else
+		err = __ipv6_dev_mc_dec(idev, addr);
 
+	rcu_read_unlock();
 	return err;
 }
 
 /*
- * identify MLD packets for MLD filter exceptions
- */
-int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
-{
-	struct icmp6hdr *pic;
-
-	if (nexthdr != IPPROTO_ICMPV6)
-		return 0;
-
-	if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
-		return 0;
-
-	pic = (struct icmp6hdr *)skb->h.raw;
-
-	switch (pic->icmp6_type) {
-	case ICMPV6_MGM_QUERY:
-	case ICMPV6_MGM_REPORT:
-	case ICMPV6_MGM_REDUCTION:
-	case ICMPV6_MLD2_REPORT:
-		return 1;
-	default:
-		break;
-	}
-	return 0;
-}
-
-/*
  *	check if the interface/address pair is valid
  */
-int ipv6_chk_mcast_addr(struct net_device *dev, struct in6_addr *group,
-	struct in6_addr *src_addr)
+bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
+			 const struct in6_addr *src_addr)
 {
 	struct inet6_dev *idev;
 	struct ifmcaddr6 *mc;
-	int rv = 0;
+	bool rv = false;
 
-	idev = in6_dev_get(dev);
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
 		for (mc = idev->mc_list; mc; mc=mc->next) {
@@ -1013,31 +989,59 @@ int ipv6_chk_mcast_addr(struct net_device *dev, struct in6_addr *group,
 					rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0;
 				spin_unlock_bh(&mc->mca_lock);
 			} else
-				rv = 1; /* don't filter unspecified source */
+				rv = true; /* don't filter unspecified source */
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
+	rcu_read_unlock();
 	return rv;
 }
 
 static void mld_gq_start_timer(struct inet6_dev *idev)
 {
-	int tv = net_random() % idev->mc_maxdelay;
+	unsigned long tv = prandom_u32() % idev->mc_maxdelay;
 
 	idev->mc_gq_running = 1;
 	if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
 		in6_dev_hold(idev);
 }
 
-static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
+static void mld_gq_stop_timer(struct inet6_dev *idev)
 {
-	int tv = net_random() % delay;
+	idev->mc_gq_running = 0;
+	if (del_timer(&idev->mc_gq_timer))
+		__in6_dev_put(idev);
+}
+
+static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
+{
+	unsigned long tv = prandom_u32() % delay;
 
 	if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
 		in6_dev_hold(idev);
 }
 
+static void mld_ifc_stop_timer(struct inet6_dev *idev)
+{
+	idev->mc_ifc_count = 0;
+	if (del_timer(&idev->mc_ifc_timer))
+		__in6_dev_put(idev);
+}
+
+static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
+{
+	unsigned long tv = prandom_u32() % delay;
+
+	if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
+		in6_dev_hold(idev);
+}
+
+static void mld_dad_stop_timer(struct inet6_dev *idev)
+{
+	if (del_timer(&idev->mc_dad_timer))
+		__in6_dev_put(idev);
+}
+
 /*
  *	IGMP handling (alias multicast ICMPv6 messages)
  */
@@ -1056,128 +1060,307 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
 		delay = ma->mca_timer.expires - jiffies;
 	}
 
-	if (delay >= resptime) {
-		if (resptime)
-			delay = net_random() % resptime;
-		else
-			delay = 1;
-	}
+	if (delay >= resptime)
+		delay = prandom_u32() % resptime;
+
 	ma->mca_timer.expires = jiffies + delay;
 	if (!mod_timer(&ma->mca_timer, jiffies + delay))
 		atomic_inc(&ma->mca_refcnt);
 	ma->mca_flags |= MAF_TIMER_RUNNING;
 }
 
-static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
-	struct in6_addr *srcs)
+/* mark EXCLUDE-mode sources */
+static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
+			     const struct in6_addr *srcs)
+{
+	struct ip6_sf_list *psf;
+	int i, scount;
+
+	scount = 0;
+	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+		if (scount == nsrcs)
+			break;
+		for (i=0; i<nsrcs; i++) {
+			/* skip inactive filters */
+			if (psf->sf_count[MCAST_INCLUDE] ||
+			    pmc->mca_sfcount[MCAST_EXCLUDE] !=
+			    psf->sf_count[MCAST_EXCLUDE])
+				break;
+			if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
+				scount++;
+				break;
+			}
+		}
+	}
+	pmc->mca_flags &= ~MAF_GSQUERY;
+	if (scount == nsrcs)	/* all sources excluded */
+		return false;
+	return true;
+}
+
+static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+			    const struct in6_addr *srcs)
 {
 	struct ip6_sf_list *psf;
 	int i, scount;
 
+	if (pmc->mca_sfmode == MCAST_EXCLUDE)
+		return mld_xmarksources(pmc, nsrcs, srcs);
+
+	/* mark INCLUDE-mode sources */
+
 	scount = 0;
 	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
 		if (scount == nsrcs)
 			break;
-		for (i=0; i<nsrcs; i++)
+		for (i=0; i<nsrcs; i++) {
 			if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
 				psf->sf_gsresp = 1;
 				scount++;
 				break;
 			}
+		}
+	}
+	if (!scount) {
+		pmc->mca_flags &= ~MAF_GSQUERY;
+		return false;
 	}
+	pmc->mca_flags |= MAF_GSQUERY;
+	return true;
 }
 
+static int mld_force_mld_version(const struct inet6_dev *idev)
+{
+	/* Normally, both are 0 here. If enforcement to a particular is
+	 * being used, individual device enforcement will have a lower
+	 * precedence over 'all' device (.../conf/all/force_mld_version).
+	 */
+
+	if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0)
+		return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version;
+	else
+		return idev->cnf.force_mld_version;
+}
+
+static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
+{
+	return mld_force_mld_version(idev) == 2;
+}
+
+static bool mld_in_v1_mode_only(const struct inet6_dev *idev)
+{
+	return mld_force_mld_version(idev) == 1;
+}
+
+static bool mld_in_v1_mode(const struct inet6_dev *idev)
+{
+	if (mld_in_v2_mode_only(idev))
+		return false;
+	if (mld_in_v1_mode_only(idev))
+		return true;
+	if (idev->mc_v1_seen && time_before(jiffies, idev->mc_v1_seen))
+		return true;
+
+	return false;
+}
+
+static void mld_set_v1_mode(struct inet6_dev *idev)
+{
+	/* RFC3810, relevant sections:
+	 *  - 9.1. Robustness Variable
+	 *  - 9.2. Query Interval
+	 *  - 9.3. Query Response Interval
+	 *  - 9.12. Older Version Querier Present Timeout
+	 */
+	unsigned long switchback;
+
+	switchback = (idev->mc_qrv * idev->mc_qi) + idev->mc_qri;
+
+	idev->mc_v1_seen = jiffies + switchback;
+}
+
+static void mld_update_qrv(struct inet6_dev *idev,
+			   const struct mld2_query *mlh2)
+{
+	/* RFC3810, relevant sections:
+	 *  - 5.1.8. QRV (Querier's Robustness Variable)
+	 *  - 9.1. Robustness Variable
+	 */
+
+	/* The value of the Robustness Variable MUST NOT be zero,
+	 * and SHOULD NOT be one. Catch this here if we ever run
+	 * into such a case in future.
+	 */
+	WARN_ON(idev->mc_qrv == 0);
+
+	if (mlh2->mld2q_qrv > 0)
+		idev->mc_qrv = mlh2->mld2q_qrv;
+
+	if (unlikely(idev->mc_qrv < 2)) {
+		net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n",
+				     idev->mc_qrv, MLD_QRV_DEFAULT);
+		idev->mc_qrv = MLD_QRV_DEFAULT;
+	}
+}
+
+static void mld_update_qi(struct inet6_dev *idev,
+			  const struct mld2_query *mlh2)
+{
+	/* RFC3810, relevant sections:
+	 *  - 5.1.9. QQIC (Querier's Query Interval Code)
+	 *  - 9.2. Query Interval
+	 *  - 9.12. Older Version Querier Present Timeout
+	 *    (the [Query Interval] in the last Query received)
+	 */
+	unsigned long mc_qqi;
+
+	if (mlh2->mld2q_qqic < 128) {
+		mc_qqi = mlh2->mld2q_qqic;
+	} else {
+		unsigned long mc_man, mc_exp;
+
+		mc_exp = MLDV2_QQIC_EXP(mlh2->mld2q_qqic);
+		mc_man = MLDV2_QQIC_MAN(mlh2->mld2q_qqic);
+
+		mc_qqi = (mc_man | 0x10) << (mc_exp + 3);
+	}
+
+	idev->mc_qi = mc_qqi * HZ;
+}
+
+static void mld_update_qri(struct inet6_dev *idev,
+			   const struct mld2_query *mlh2)
+{
+	/* RFC3810, relevant sections:
+	 *  - 5.1.3. Maximum Response Code
+	 *  - 9.3. Query Response Interval
+	 */
+	idev->mc_qri = msecs_to_jiffies(mldv2_mrc(mlh2));
+}
+
+static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
+			  unsigned long *max_delay)
+{
+	unsigned long mldv1_md;
+
+	/* Ignore v1 queries */
+	if (mld_in_v2_mode_only(idev))
+		return -EINVAL;
+
+	/* MLDv1 router present */
+	mldv1_md = ntohs(mld->mld_maxdelay);
+	*max_delay = max(msecs_to_jiffies(mldv1_md), 1UL);
+
+	mld_set_v1_mode(idev);
+
+	/* cancel MLDv2 report timer */
+	mld_gq_stop_timer(idev);
+	/* cancel the interface change timer */
+	mld_ifc_stop_timer(idev);
+	/* clear deleted report items */
+	mld_clear_delrec(idev);
+
+	return 0;
+}
+
+static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
+			  unsigned long *max_delay)
+{
+	/* hosts need to stay in MLDv1 mode, discard MLDv2 queries */
+	if (mld_in_v1_mode(idev))
+		return -EINVAL;
+
+	*max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
+
+	mld_update_qrv(idev, mld);
+	mld_update_qi(idev, mld);
+	mld_update_qri(idev, mld);
+
+	idev->mc_maxdelay = *max_delay;
+
+	return 0;
+}
+
+/* called with rcu_read_lock() */
 int igmp6_event_query(struct sk_buff *skb)
 {
 	struct mld2_query *mlh2 = NULL;
 	struct ifmcaddr6 *ma;
-	struct in6_addr *group;
+	const struct in6_addr *group;
 	unsigned long max_delay;
 	struct inet6_dev *idev;
-	struct icmp6hdr *hdr;
+	struct mld_msg *mld;
 	int group_type;
 	int mark = 0;
-	int len;
+	int len, err;
 
 	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
 		return -EINVAL;
 
 	/* compute payload length excluding extension headers */
-	len = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
-	len -= (char *)skb->h.raw - (char *)skb->nh.ipv6h; 
-
-	/* Drop queries with not link local source */
-	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
+	len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
+	len -= skb_network_header_len(skb);
+
+	/* RFC3810 6.2
+	 * Upon reception of an MLD message that contains a Query, the node
+	 * checks if the source address of the message is a valid link-local
+	 * address, if the Hop Limit is set to 1, and if the Router Alert
+	 * option is present in the Hop-By-Hop Options header of the IPv6
+	 * packet.  If any of these checks fails, the packet is dropped.
+	 */
+	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) ||
+	    ipv6_hdr(skb)->hop_limit != 1 ||
+	    !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
+	    IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
 		return -EINVAL;
 
-	idev = in6_dev_get(skb->dev);
-
+	idev = __in6_dev_get(skb->dev);
 	if (idev == NULL)
 		return 0;
 
-	hdr = (struct icmp6hdr *) skb->h.raw;
-	group = (struct in6_addr *) (hdr + 1);
+	mld = (struct mld_msg *)icmp6_hdr(skb);
+	group = &mld->mld_mca;
 	group_type = ipv6_addr_type(group);
 
 	if (group_type != IPV6_ADDR_ANY &&
-	    !(group_type&IPV6_ADDR_MULTICAST)) {
-		in6_dev_put(idev);
+	    !(group_type&IPV6_ADDR_MULTICAST))
 		return -EINVAL;
-	}
 
-	if (len == 24) {
-		int switchback;
-		/* MLDv1 router present */
+	if (len == MLD_V1_QUERY_LEN) {
+		err = mld_process_v1(idev, mld, &max_delay);
+		if (err < 0)
+			return err;
+	} else if (len >= MLD_V2_QUERY_LEN_MIN) {
+		int srcs_offset = sizeof(struct mld2_query) -
+				  sizeof(struct icmp6hdr);
 
-		/* Translate milliseconds to jiffies */
-		max_delay = (ntohs(hdr->icmp6_maxdelay)*HZ)/1000;
+		if (!pskb_may_pull(skb, srcs_offset))
+			return -EINVAL;
 
-		switchback = (idev->mc_qrv + 1) * max_delay;
-		idev->mc_v1_seen = jiffies + switchback;
+		mlh2 = (struct mld2_query *)skb_transport_header(skb);
+
+		err = mld_process_v2(idev, mlh2, &max_delay);
+		if (err < 0)
+			return err;
 
-		/* cancel the interface change timer */
-		idev->mc_ifc_count = 0;
-		if (del_timer(&idev->mc_ifc_timer))
-			__in6_dev_put(idev);
-		/* clear deleted report items */
-		mld_clear_delrec(idev);
-	} else if (len >= 28) {
-		int srcs_offset = sizeof(struct mld2_query) - 
-				  sizeof(struct icmp6hdr);
-		if (!pskb_may_pull(skb, srcs_offset)) {
-			in6_dev_put(idev);
-			return -EINVAL;
-		}
-		mlh2 = (struct mld2_query *) skb->h.raw;
-		max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
-		if (!max_delay)
-			max_delay = 1;
-		idev->mc_maxdelay = max_delay;
-		if (mlh2->qrv)
-			idev->mc_qrv = mlh2->qrv;
 		if (group_type == IPV6_ADDR_ANY) { /* general query */
-			if (mlh2->nsrcs) {
-				in6_dev_put(idev);
+			if (mlh2->mld2q_nsrcs)
 				return -EINVAL; /* no sources allowed */
-			}
+
 			mld_gq_start_timer(idev);
-			in6_dev_put(idev);
 			return 0;
 		}
 		/* mark sources to include, if group & source-specific */
-		if (mlh2->nsrcs != 0) {
-			if (!pskb_may_pull(skb, srcs_offset + 
-				mlh2->nsrcs * sizeof(struct in6_addr))) {
-				in6_dev_put(idev);
+		if (mlh2->mld2q_nsrcs != 0) {
+			if (!pskb_may_pull(skb, srcs_offset +
+			    ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr)))
 				return -EINVAL;
-			}
-			mlh2 = (struct mld2_query *) skb->h.raw;
+
+			mlh2 = (struct mld2_query *)skb_transport_header(skb);
 			mark = 1;
 		}
-	} else {
-		in6_dev_put(idev);
+	} else
 		return -EINVAL;
-	}
 
 	read_lock_bh(&idev->lock);
 	if (group_type == IPV6_ADDR_ANY) {
@@ -1188,8 +1371,7 @@ int igmp6_event_query(struct sk_buff *skb)
 		}
 	} else {
 		for (ma = idev->mc_list; ma; ma=ma->next) {
-			if (group_type != IPV6_ADDR_ANY &&
-			    !ipv6_addr_equal(group, &ma->mca_addr))
+			if (!ipv6_addr_equal(group, &ma->mca_addr))
 				continue;
 			spin_lock_bh(&ma->mca_lock);
 			if (ma->mca_flags & MAF_TIMER_RUNNING) {
@@ -1203,48 +1385,47 @@ int igmp6_event_query(struct sk_buff *skb)
 				else
 					ma->mca_flags &= ~MAF_GSQUERY;
 			}
-			if (ma->mca_flags & MAF_GSQUERY)
-				mld_marksources(ma, ntohs(mlh2->nsrcs),
-					mlh2->srcs);
-			igmp6_group_queried(ma, max_delay);
+			if (!(ma->mca_flags & MAF_GSQUERY) ||
+			    mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs))
+				igmp6_group_queried(ma, max_delay);
 			spin_unlock_bh(&ma->mca_lock);
-			if (group_type != IPV6_ADDR_ANY)
-				break;
+			break;
 		}
 	}
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
 
 	return 0;
 }
 
-
+/* called with rcu_read_lock() */
 int igmp6_event_report(struct sk_buff *skb)
 {
 	struct ifmcaddr6 *ma;
-	struct in6_addr *addrp;
 	struct inet6_dev *idev;
-	struct icmp6hdr *hdr;
+	struct mld_msg *mld;
 	int addr_type;
 
 	/* Our own report looped back. Ignore it. */
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		return 0;
 
-	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+	/* send our report if the MC router may not have heard this report */
+	if (skb->pkt_type != PACKET_MULTICAST &&
+	    skb->pkt_type != PACKET_BROADCAST)
+		return 0;
+
+	if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr)))
 		return -EINVAL;
 
-	hdr = (struct icmp6hdr*) skb->h.raw;
+	mld = (struct mld_msg *)icmp6_hdr(skb);
 
 	/* Drop reports with not link local source */
-	addr_type = ipv6_addr_type(&skb->nh.ipv6h->saddr);
-	if (addr_type != IPV6_ADDR_ANY && 
+	addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
+	if (addr_type != IPV6_ADDR_ANY &&
 	    !(addr_type&IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
-	addrp = (struct in6_addr *) (hdr + 1);
-
-	idev = in6_dev_get(skb->dev);
+	idev = __in6_dev_get(skb->dev);
 	if (idev == NULL)
 		return -ENODEV;
 
@@ -1254,7 +1435,7 @@ int igmp6_event_report(struct sk_buff *skb)
 
 	read_lock_bh(&idev->lock);
 	for (ma = idev->mc_list; ma; ma=ma->next) {
-		if (ipv6_addr_equal(&ma->mca_addr, addrp)) {
+		if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
 			spin_lock(&ma->mca_lock);
 			if (del_timer(&ma->mca_timer))
 				atomic_dec(&ma->mca_refcnt);
@@ -1264,41 +1445,51 @@ int igmp6_event_report(struct sk_buff *skb)
 		}
 	}
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
 	return 0;
 }
 
-static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
-	int gdeleted, int sdeleted)
+static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
+		  int gdeleted, int sdeleted)
 {
 	switch (type) {
 	case MLD2_MODE_IS_INCLUDE:
 	case MLD2_MODE_IS_EXCLUDE:
 		if (gdeleted || sdeleted)
-			return 0;
-		return !((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp);
+			return false;
+		if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) {
+			if (pmc->mca_sfmode == MCAST_INCLUDE)
+				return true;
+			/* don't include if this source is excluded
+			 * in all filters
+			 */
+			if (psf->sf_count[MCAST_INCLUDE])
+				return type == MLD2_MODE_IS_INCLUDE;
+			return pmc->mca_sfcount[MCAST_EXCLUDE] ==
+				psf->sf_count[MCAST_EXCLUDE];
+		}
+		return false;
 	case MLD2_CHANGE_TO_INCLUDE:
 		if (gdeleted || sdeleted)
-			return 0;
+			return false;
 		return psf->sf_count[MCAST_INCLUDE] != 0;
 	case MLD2_CHANGE_TO_EXCLUDE:
 		if (gdeleted || sdeleted)
-			return 0;
+			return false;
 		if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 ||
 		    psf->sf_count[MCAST_INCLUDE])
-			return 0;
+			return false;
 		return pmc->mca_sfcount[MCAST_EXCLUDE] ==
 			psf->sf_count[MCAST_EXCLUDE];
 	case MLD2_ALLOW_NEW_SOURCES:
 		if (gdeleted || !psf->sf_crcount)
-			return 0;
+			return false;
 		return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted;
 	case MLD2_BLOCK_OLD_SOURCES:
 		if (pmc->mca_sfmode == MCAST_INCLUDE)
 			return gdeleted || (psf->sf_crcount && sdeleted);
 		return psf->sf_crcount && !gdeleted && !sdeleted;
 	}
-	return 0;
+	return false;
 }
 
 static int
@@ -1315,97 +1506,142 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
 	return scount;
 }
 
-static struct sk_buff *mld_newpack(struct net_device *dev, int size)
+static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
+		       struct net_device *dev,
+		       const struct in6_addr *saddr,
+		       const struct in6_addr *daddr,
+		       int proto, int len)
 {
-	struct sock *sk = igmp6_socket->sk;
+	struct ipv6hdr *hdr;
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
+
+	skb_reset_network_header(skb);
+	skb_put(skb, sizeof(struct ipv6hdr));
+	hdr = ipv6_hdr(skb);
+
+	ip6_flow_hdr(hdr, 0, 0);
+
+	hdr->payload_len = htons(len);
+	hdr->nexthdr = proto;
+	hdr->hop_limit = inet6_sk(sk)->hop_limit;
+
+	hdr->saddr = *saddr;
+	hdr->daddr = *daddr;
+}
+
+static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
+{
+	struct net_device *dev = idev->dev;
+	struct net *net = dev_net(dev);
+	struct sock *sk = net->ipv6.igmp_sk;
 	struct sk_buff *skb;
 	struct mld2_report *pmr;
 	struct in6_addr addr_buf;
+	const struct in6_addr *saddr;
+	int hlen = LL_RESERVED_SPACE(dev);
+	int tlen = dev->needed_tailroom;
 	int err;
 	u8 ra[8] = { IPPROTO_ICMPV6, 0,
 		     IPV6_TLV_ROUTERALERT, 2, 0, 0,
 		     IPV6_TLV_PADN, 0 };
 
 	/* we assume size > sizeof(ra) here */
-	skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err);
+	size += hlen + tlen;
+	/* limit our allocations to order-0 page */
+	size = min_t(int, size, SKB_MAX_ORDER(0, 0));
+	skb = sock_alloc_send_skb(sk, size, 1, &err);
 
-	if (skb == 0)
+	if (!skb)
 		return NULL;
 
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	skb->priority = TC_PRIO_CONTROL;
+	skb_reserve(skb, hlen);
 
-	if (ipv6_get_lladdr(dev, &addr_buf)) {
+	if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
 		/* <draft-ietf-magma-mld-source-05.txt>:
-		 * use unspecified address as the source address 
+		 * use unspecified address as the source address
 		 * when a valid link-local address is not available.
 		 */
-		memset(&addr_buf, 0, sizeof(addr_buf));
-	}
+		saddr = &in6addr_any;
+	} else
+		saddr = &addr_buf;
 
-	ip6_nd_hdr(sk, skb, dev, &addr_buf, &mld2_all_mcr, NEXTHDR_HOP, 0);
+	ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
-	pmr =(struct mld2_report *)skb_put(skb, sizeof(*pmr));
-	skb->h.raw = (unsigned char *)pmr;
-	pmr->type = ICMPV6_MLD2_REPORT;
-	pmr->resv1 = 0;
-	pmr->csum = 0;
-	pmr->resv2 = 0;
-	pmr->ngrec = 0;
+	skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
+	skb_put(skb, sizeof(*pmr));
+	pmr = (struct mld2_report *)skb_transport_header(skb);
+	pmr->mld2r_type = ICMPV6_MLD2_REPORT;
+	pmr->mld2r_resv1 = 0;
+	pmr->mld2r_cksum = 0;
+	pmr->mld2r_resv2 = 0;
+	pmr->mld2r_ngrec = 0;
 	return skb;
 }
 
-static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-
-	if (dev->hard_header) {
-		unsigned char ha[MAX_ADDR_LEN];
-		int err;
-
-		ndisc_mc_map(&skb->nh.ipv6h->daddr, ha, dev, 1);
-		err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len);
-		if (err < 0) {
-			kfree_skb(skb);
-			return err;
-		}
-	}
-	return dev_queue_xmit(skb);
-}
-
-static inline int mld_dev_queue_xmit(struct sk_buff *skb)
-{
-	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dev,
-	               mld_dev_queue_xmit2);
-}
-
 static void mld_sendpack(struct sk_buff *skb)
 {
-	struct ipv6hdr *pip6 = skb->nh.ipv6h;
-	struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
+	struct ipv6hdr *pip6 = ipv6_hdr(skb);
+	struct mld2_report *pmr =
+			      (struct mld2_report *)skb_transport_header(skb);
 	int payload_len, mldlen;
-	struct inet6_dev *idev = in6_dev_get(skb->dev);
+	struct inet6_dev *idev;
+	struct net *net = dev_net(skb->dev);
 	int err;
+	struct flowi6 fl6;
+	struct dst_entry *dst;
+
+	rcu_read_lock();
+	idev = __in6_dev_get(skb->dev);
+	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
-	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-	payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h -
-		sizeof(struct ipv6hdr);
-	mldlen = skb->tail - skb->h.raw;
+	payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) -
+		sizeof(*pip6);
+	mldlen = skb_tail_pointer(skb) - skb_transport_header(skb);
 	pip6->payload_len = htons(payload_len);
 
-	pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
-		IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
-		mld_dev_queue_xmit);
+	pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
+					   IPPROTO_ICMPV6,
+					   csum_partial(skb_transport_header(skb),
+							mldlen, 0));
+
+	icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
+			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+			 skb->dev->ifindex);
+	dst = icmp6_dst_alloc(skb->dev, &fl6);
+
+	err = 0;
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		dst = NULL;
+	}
+	skb_dst_set(skb, dst);
+	if (err)
+		goto err_out;
+
+	payload_len = skb->len;
+
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
+		      dst_output);
+out:
 	if (!err) {
-		ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS);
-		IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
-	} else
-		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+		ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
+	} else {
+		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+	}
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	rcu_read_unlock();
+	return;
+
+err_out:
+	kfree_skb(skb);
+	goto out;
 }
 
 static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
@@ -1421,7 +1657,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	struct mld2_grec *pgr;
 
 	if (!skb)
-		skb = mld_newpack(dev, dev->mtu);
+		skb = mld_newpack(pmc->idev, dev->mtu);
 	if (!skb)
 		return NULL;
 	pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
@@ -1429,8 +1665,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	pgr->grec_auxwords = 0;
 	pgr->grec_nsrcs = 0;
 	pgr->grec_mca = pmc->mca_addr;	/* structure copy */
-	pmr = (struct mld2_report *)skb->h.raw;
-	pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
+	pmr = (struct mld2_report *)skb_transport_header(skb);
+	pmr->mld2r_ngrec = htons(ntohs(pmr->mld2r_ngrec)+1);
 	*ppgr = pgr;
 	return skb;
 }
@@ -1439,13 +1675,14 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	skb_tailroom(skb)) : 0)
 
 static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
-	int type, int gdeleted, int sdeleted)
+	int type, int gdeleted, int sdeleted, int crsend)
 {
-	struct net_device *dev = pmc->idev->dev;
+	struct inet6_dev *idev = pmc->idev;
+	struct net_device *dev = idev->dev;
 	struct mld2_report *pmr;
 	struct mld2_grec *pgr = NULL;
 	struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
-	int scount, first, isquery, truncate;
+	int scount, stotal, first, isquery, truncate;
 
 	if (pmc->mca_flags & MAF_NOREPORT)
 		return skb;
@@ -1455,38 +1692,25 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	truncate = type == MLD2_MODE_IS_EXCLUDE ||
 		    type == MLD2_CHANGE_TO_EXCLUDE;
 
+	stotal = scount = 0;
+
 	psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
 
-	if (!*psf_list) {
-		if (type == MLD2_ALLOW_NEW_SOURCES ||
-		    type == MLD2_BLOCK_OLD_SOURCES)
-			return skb;
-		if (pmc->mca_crcount || isquery) {
-			/* make sure we have room for group header and at
-			 * least one source.
-			 */
-			if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)+
-			    sizeof(struct in6_addr)) {
-				mld_sendpack(skb);
-				skb = NULL; /* add_grhead will get a new one */
-			}
-			skb = add_grhead(skb, pmc, type, &pgr);
-		}
-		return skb;
-	}
-	pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
+	if (!*psf_list)
+		goto empty_source;
+
+	pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
 	if (truncate) {
-		if (pmr && pmr->ngrec &&
+		if (pmr && pmr->mld2r_ngrec &&
 		    AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
 			if (skb)
 				mld_sendpack(skb);
-			skb = mld_newpack(dev, dev->mtu);
+			skb = mld_newpack(idev, dev->mtu);
 		}
 	}
 	first = 1;
-	scount = 0;
 	psf_prev = NULL;
 	for (psf=*psf_list; psf; psf=psf_next) {
 		struct in6_addr *psrc;
@@ -1510,7 +1734,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 				pgr->grec_nsrcs = htons(scount);
 			if (skb)
 				mld_sendpack(skb);
-			skb = mld_newpack(dev, dev->mtu);
+			skb = mld_newpack(idev, dev->mtu);
 			first = 1;
 			scount = 0;
 		}
@@ -1518,9 +1742,11 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 			skb = add_grhead(skb, pmc, type, &pgr);
 			first = 0;
 		}
+		if (!skb)
+			return NULL;
 		psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
 		*psrc = psf->sf_addr;
-		scount++;
+		scount++; stotal++;
 		if ((type == MLD2_ALLOW_NEW_SOURCES ||
 		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
 			psf->sf_crcount--;
@@ -1535,6 +1761,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		}
 		psf_prev = psf;
 	}
+
+empty_source:
+	if (!stotal) {
+		if (type == MLD2_ALLOW_NEW_SOURCES ||
+		    type == MLD2_BLOCK_OLD_SOURCES)
+			return skb;
+		if (pmc->mca_crcount || isquery || crsend) {
+			/* make sure we have room for group header */
+			if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {
+				mld_sendpack(skb);
+				skb = NULL; /* add_grhead will get a new one */
+			}
+			skb = add_grhead(skb, pmc, type, &pgr);
+		}
+	}
 	if (pgr)
 		pgr->grec_nsrcs = htons(scount);
 
@@ -1548,8 +1789,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
 	struct sk_buff *skb = NULL;
 	int type;
 
+	read_lock_bh(&idev->lock);
 	if (!pmc) {
-		read_lock_bh(&idev->lock);
 		for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
 			if (pmc->mca_flags & MAF_NOREPORT)
 				continue;
@@ -1558,19 +1799,19 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
 				type = MLD2_MODE_IS_EXCLUDE;
 			else
 				type = MLD2_MODE_IS_INCLUDE;
-			skb = add_grec(skb, pmc, type, 0, 0);
+			skb = add_grec(skb, pmc, type, 0, 0, 0);
 			spin_unlock_bh(&pmc->mca_lock);
 		}
-		read_unlock_bh(&idev->lock);
 	} else {
 		spin_lock_bh(&pmc->mca_lock);
 		if (pmc->mca_sfcount[MCAST_EXCLUDE])
 			type = MLD2_MODE_IS_EXCLUDE;
 		else
 			type = MLD2_MODE_IS_INCLUDE;
-		skb = add_grec(skb, pmc, type, 0, 0);
+		skb = add_grec(skb, pmc, type, 0, 0, 0);
 		spin_unlock_bh(&pmc->mca_lock);
 	}
+	read_unlock_bh(&idev->lock);
 	if (skb)
 		mld_sendpack(skb);
 }
@@ -1603,7 +1844,7 @@ static void mld_send_cr(struct inet6_dev *idev)
 	int type, dtype;
 
 	read_lock_bh(&idev->lock);
-	write_lock_bh(&idev->mc_lock);
+	spin_lock(&idev->mc_lock);
 
 	/* deleted MCA's */
 	pmc_prev = NULL;
@@ -1612,15 +1853,15 @@ static void mld_send_cr(struct inet6_dev *idev)
 		if (pmc->mca_sfmode == MCAST_INCLUDE) {
 			type = MLD2_BLOCK_OLD_SOURCES;
 			dtype = MLD2_BLOCK_OLD_SOURCES;
-			skb = add_grec(skb, pmc, type, 1, 0);
-			skb = add_grec(skb, pmc, dtype, 1, 1);
+			skb = add_grec(skb, pmc, type, 1, 0, 0);
+			skb = add_grec(skb, pmc, dtype, 1, 1, 0);
 		}
 		if (pmc->mca_crcount) {
-			pmc->mca_crcount--;
 			if (pmc->mca_sfmode == MCAST_EXCLUDE) {
 				type = MLD2_CHANGE_TO_INCLUDE;
-				skb = add_grec(skb, pmc, type, 1, 0);
+				skb = add_grec(skb, pmc, type, 1, 0, 0);
 			}
+			pmc->mca_crcount--;
 			if (pmc->mca_crcount == 0) {
 				mld_clear_zeros(&pmc->mca_tomb);
 				mld_clear_zeros(&pmc->mca_sources);
@@ -1637,7 +1878,7 @@ static void mld_send_cr(struct inet6_dev *idev)
 		} else
 			pmc_prev = pmc;
 	}
-	write_unlock_bh(&idev->mc_lock);
+	spin_unlock(&idev->mc_lock);
 
 	/* change recs */
 	for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
@@ -1649,17 +1890,17 @@ static void mld_send_cr(struct inet6_dev *idev)
 			type = MLD2_ALLOW_NEW_SOURCES;
 			dtype = MLD2_BLOCK_OLD_SOURCES;
 		}
-		skb = add_grec(skb, pmc, type, 0, 0);
-		skb = add_grec(skb, pmc, dtype, 0, 1);	/* deleted sources */
+		skb = add_grec(skb, pmc, type, 0, 0, 0);
+		skb = add_grec(skb, pmc, dtype, 0, 1, 0);	/* deleted sources */
 
 		/* filter mode changes */
 		if (pmc->mca_crcount) {
-			pmc->mca_crcount--;
 			if (pmc->mca_sfmode == MCAST_EXCLUDE)
 				type = MLD2_CHANGE_TO_EXCLUDE;
 			else
 				type = MLD2_CHANGE_TO_INCLUDE;
-			skb = add_grec(skb, pmc, type, 0, 0);
+			skb = add_grec(skb, pmc, type, 0, 0, 0);
+			pmc->mca_crcount--;
 		}
 		spin_unlock_bh(&pmc->mca_lock);
 	}
@@ -1671,83 +1912,152 @@ static void mld_send_cr(struct inet6_dev *idev)
 
 static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 {
-	struct sock *sk = igmp6_socket->sk;
+	struct net *net = dev_net(dev);
+	struct sock *sk = net->ipv6.igmp_sk;
 	struct inet6_dev *idev;
-        struct sk_buff *skb;
-        struct icmp6hdr *hdr;
-	struct in6_addr *snd_addr;
-	struct in6_addr *addrp;
+	struct sk_buff *skb;
+	struct mld_msg *hdr;
+	const struct in6_addr *snd_addr, *saddr;
 	struct in6_addr addr_buf;
-	struct in6_addr all_routers;
+	int hlen = LL_RESERVED_SPACE(dev);
+	int tlen = dev->needed_tailroom;
 	int err, len, payload_len, full_len;
 	u8 ra[8] = { IPPROTO_ICMPV6, 0,
 		     IPV6_TLV_ROUTERALERT, 2, 0, 0,
 		     IPV6_TLV_PADN, 0 };
+	struct flowi6 fl6;
+	struct dst_entry *dst;
 
-	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-	snd_addr = addr;
-	if (type == ICMPV6_MGM_REDUCTION) {
-		snd_addr = &all_routers;
-		ipv6_addr_all_routers(&all_routers);
-	}
+	if (type == ICMPV6_MGM_REDUCTION)
+		snd_addr = &in6addr_linklocal_allrouters;
+	else
+		snd_addr = addr;
 
 	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
 	payload_len = len + sizeof(ra);
 	full_len = sizeof(struct ipv6hdr) + payload_len;
 
-	skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err);
+	rcu_read_lock();
+	IP6_UPD_PO_STATS(net, __in6_dev_get(dev),
+		      IPSTATS_MIB_OUT, full_len);
+	rcu_read_unlock();
+
+	skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
 
 	if (skb == NULL) {
-		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+		rcu_read_lock();
+		IP6_INC_STATS(net, __in6_dev_get(dev),
+			      IPSTATS_MIB_OUTDISCARDS);
+		rcu_read_unlock();
 		return;
 	}
+	skb->priority = TC_PRIO_CONTROL;
+	skb_reserve(skb, hlen);
 
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-
-	if (ipv6_get_lladdr(dev, &addr_buf)) {
+	if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
 		/* <draft-ietf-magma-mld-source-05.txt>:
-		 * use unspecified address as the source address 
+		 * use unspecified address as the source address
 		 * when a valid link-local address is not available.
 		 */
-		memset(&addr_buf, 0, sizeof(addr_buf));
-	}
+		saddr = &in6addr_any;
+	} else
+		saddr = &addr_buf;
 
-	ip6_nd_hdr(sk, skb, dev, &addr_buf, snd_addr, NEXTHDR_HOP, payload_len);
+	ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
-	hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr));
-	memset(hdr, 0, sizeof(struct icmp6hdr));
-	hdr->icmp6_type = type;
+	hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
+	memset(hdr, 0, sizeof(struct mld_msg));
+	hdr->mld_type = type;
+	hdr->mld_mca = *addr;
 
-	addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr));
-	ipv6_addr_copy(addrp, addr);
+	hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
+					 IPPROTO_ICMPV6,
+					 csum_partial(hdr, len, 0));
 
-	hdr->icmp6_cksum = csum_ipv6_magic(&addr_buf, snd_addr, len,
-					   IPPROTO_ICMPV6,
-					   csum_partial((__u8 *) hdr, len, 0));
+	rcu_read_lock();
+	idev = __in6_dev_get(skb->dev);
 
-	idev = in6_dev_get(skb->dev);
+	icmpv6_flow_init(sk, &fl6, type,
+			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+			 skb->dev->ifindex);
+	dst = icmp6_dst_alloc(skb->dev, &fl6);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		goto err_out;
+	}
 
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
-		mld_dev_queue_xmit);
+	skb_dst_set(skb, dst);
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
+		      dst_output);
+out:
 	if (!err) {
-		if (type == ICMPV6_MGM_REDUCTION)
-			ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS);
-		else
-			ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBRESPONSES);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
-		IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+		ICMP6MSGOUT_INC_STATS(net, idev, type);
+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
 	} else
-		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	rcu_read_unlock();
 	return;
+
+err_out:
+	kfree_skb(skb);
+	goto out;
+}
+
+static void mld_send_initial_cr(struct inet6_dev *idev)
+{
+	struct sk_buff *skb;
+	struct ifmcaddr6 *pmc;
+	int type;
+
+	if (mld_in_v1_mode(idev))
+		return;
+
+	skb = NULL;
+	read_lock_bh(&idev->lock);
+	for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+		spin_lock_bh(&pmc->mca_lock);
+		if (pmc->mca_sfcount[MCAST_EXCLUDE])
+			type = MLD2_CHANGE_TO_EXCLUDE;
+		else
+			type = MLD2_CHANGE_TO_INCLUDE;
+		skb = add_grec(skb, pmc, type, 0, 0, 1);
+		spin_unlock_bh(&pmc->mca_lock);
+	}
+	read_unlock_bh(&idev->lock);
+	if (skb)
+		mld_sendpack(skb);
+}
+
+void ipv6_mc_dad_complete(struct inet6_dev *idev)
+{
+	idev->mc_dad_count = idev->mc_qrv;
+	if (idev->mc_dad_count) {
+		mld_send_initial_cr(idev);
+		idev->mc_dad_count--;
+		if (idev->mc_dad_count)
+			mld_dad_start_timer(idev, idev->mc_maxdelay);
+	}
+}
+
+static void mld_dad_timer_expire(unsigned long data)
+{
+	struct inet6_dev *idev = (struct inet6_dev *)data;
+
+	mld_send_initial_cr(idev);
+	if (idev->mc_dad_count) {
+		idev->mc_dad_count--;
+		if (idev->mc_dad_count)
+			mld_dad_start_timer(idev, idev->mc_maxdelay);
+	}
+	in6_dev_put(idev);
 }
 
 static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
-	struct in6_addr *psfsrc)
+	const struct in6_addr *psfsrc)
 {
 	struct ip6_sf_list *psf, *psf_prev;
 	int rv = 0;
@@ -1772,7 +2082,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
 		else
 			pmc->mca_sources = psf->sf_next;
 		if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
-		    !MLD_V1_SEEN(idev)) {
+		    !mld_in_v1_mode(idev)) {
 			psf->sf_crcount = idev->mc_qrv;
 			psf->sf_next = pmc->mca_tomb;
 			pmc->mca_tomb = psf;
@@ -1783,8 +2093,8 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
 	return rv;
 }
 
-static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
-			  int sfmode, int sfcount, struct in6_addr *psfsrc,
+static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+			  int sfmode, int sfcount, const struct in6_addr *psfsrc,
 			  int delta)
 {
 	struct ifmcaddr6 *pmc;
@@ -1844,7 +2154,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
  * Add multicast single-source filter to the interface list
  */
 static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
-	struct in6_addr *psfsrc, int delta)
+	const struct in6_addr *psfsrc)
 {
 	struct ip6_sf_list *psf, *psf_prev;
 
@@ -1855,10 +2165,10 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
 		psf_prev = psf;
 	}
 	if (!psf) {
-		psf = (struct ip6_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC);
+		psf = kzalloc(sizeof(*psf), GFP_ATOMIC);
 		if (!psf)
 			return -ENOBUFS;
-		memset(psf, 0, sizeof(*psf));
+
 		psf->sf_addr = *psfsrc;
 		if (psf_prev) {
 			psf_prev->sf_next = psf;
@@ -1885,7 +2195,7 @@ static void sf_markstate(struct ifmcaddr6 *pmc)
 
 static int sf_setstate(struct ifmcaddr6 *pmc)
 {
-	struct ip6_sf_list *psf;
+	struct ip6_sf_list *psf, *dpsf;
 	int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
 	int qrv = pmc->idev->mc_qrv;
 	int new_in, rv;
@@ -1897,8 +2207,47 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 				!psf->sf_count[MCAST_INCLUDE];
 		} else
 			new_in = psf->sf_count[MCAST_INCLUDE] != 0;
-		if (new_in != psf->sf_oldin) {
-			psf->sf_crcount = qrv;
+		if (new_in) {
+			if (!psf->sf_oldin) {
+				struct ip6_sf_list *prev = NULL;
+
+				for (dpsf=pmc->mca_tomb; dpsf;
+				     dpsf=dpsf->sf_next) {
+					if (ipv6_addr_equal(&dpsf->sf_addr,
+					    &psf->sf_addr))
+						break;
+					prev = dpsf;
+				}
+				if (dpsf) {
+					if (prev)
+						prev->sf_next = dpsf->sf_next;
+					else
+						pmc->mca_tomb = dpsf->sf_next;
+					kfree(dpsf);
+				}
+				psf->sf_crcount = qrv;
+				rv++;
+			}
+		} else if (psf->sf_oldin) {
+			psf->sf_crcount = 0;
+			/*
+			 * add or update "delete" records if an active filter
+			 * is now inactive
+			 */
+			for (dpsf=pmc->mca_tomb; dpsf; dpsf=dpsf->sf_next)
+				if (ipv6_addr_equal(&dpsf->sf_addr,
+				    &psf->sf_addr))
+					break;
+			if (!dpsf) {
+				dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+				if (!dpsf)
+					continue;
+				*dpsf = *psf;
+				/* pmc->mca_lock held by callers */
+				dpsf->sf_next = pmc->mca_tomb;
+				pmc->mca_tomb = dpsf;
+			}
+			dpsf->sf_crcount = qrv;
 			rv++;
 		}
 	}
@@ -1908,8 +2257,8 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 /*
  * Add multicast source filter list to the interface list
  */
-static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
-			  int sfmode, int sfcount, struct in6_addr *psfsrc,
+static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+			  int sfmode, int sfcount, const struct in6_addr *psfsrc,
 			  int delta)
 {
 	struct ifmcaddr6 *pmc;
@@ -1936,7 +2285,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
 		pmc->mca_sfcount[sfmode]++;
 	err = 0;
 	for (i=0; i<sfcount; i++) {
-		err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
+		err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
 		if (err)
 			break;
 	}
@@ -1946,9 +2295,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
 		if (!delta)
 			pmc->mca_sfcount[sfmode]--;
 		for (j=0; j<i; j++)
-			(void) ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+			ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
 	} else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
-		struct inet6_dev *idev = pmc->idev;
 		struct ip6_sf_list *psf;
 
 		/* filter mode change */
@@ -1999,7 +2347,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
 
 	igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
 
-	delay = net_random() % IGMP6_UNSOLICITED_IVAL;
+	delay = prandom_u32() % unsolicited_report_interval(ma->idev);
 
 	spin_lock_bh(&ma->mca_lock);
 	if (del_timer(&ma->mca_timer)) {
@@ -2018,7 +2366,10 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 {
 	int err;
 
-	if (iml->sflist == 0) {
+	/* callers have the socket lock and a write lock on ipv6_sk_mc_lock,
+	 * so no other readers or writers of iml or its sflist
+	 */
+	if (!iml->sflist) {
 		/* any-source empty exclude case */
 		return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
 	}
@@ -2031,7 +2382,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 
 static void igmp6_leave_group(struct ifmcaddr6 *ma)
 {
-	if (MLD_V1_SEEN(ma->idev)) {
+	if (mld_in_v1_mode(ma->idev)) {
 		if (ma->mca_flags & MAF_LAST_REPORTER)
 			igmp6_send(&ma->mca_addr, ma->idev->dev,
 				ICMPV6_MGM_REDUCTION);
@@ -2047,7 +2398,7 @@ static void mld_gq_timer_expire(unsigned long data)
 
 	idev->mc_gq_running = 0;
 	mld_send_report(idev, NULL);
-	__in6_dev_put(idev);
+	in6_dev_put(idev);
 }
 
 static void mld_ifc_timer_expire(unsigned long data)
@@ -2060,12 +2411,12 @@ static void mld_ifc_timer_expire(unsigned long data)
 		if (idev->mc_ifc_count)
 			mld_ifc_start_timer(idev, idev->mc_maxdelay);
 	}
-	__in6_dev_put(idev);
+	in6_dev_put(idev);
 }
 
 static void mld_ifc_event(struct inet6_dev *idev)
 {
-	if (MLD_V1_SEEN(idev))
+	if (mld_in_v1_mode(idev))
 		return;
 	idev->mc_ifc_count = idev->mc_qrv;
 	mld_ifc_start_timer(idev, 1);
@@ -2076,7 +2427,7 @@ static void igmp6_timer_handler(unsigned long data)
 {
 	struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
 
-	if (MLD_V1_SEEN(ma->idev))
+	if (mld_in_v1_mode(ma->idev))
 		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
 	else
 		mld_send_report(ma->idev, ma);
@@ -2088,6 +2439,25 @@ static void igmp6_timer_handler(unsigned long data)
 	ma_put(ma);
 }
 
+/* Device changing type */
+
+void ipv6_mc_unmap(struct inet6_dev *idev)
+{
+	struct ifmcaddr6 *i;
+
+	/* Install multicast list, except for all-nodes (already installed) */
+
+	read_lock_bh(&idev->lock);
+	for (i = idev->mc_list; i; i = i->next)
+		igmp6_group_dropped(i);
+	read_unlock_bh(&idev->lock);
+}
+
+void ipv6_mc_remap(struct inet6_dev *idev)
+{
+	ipv6_mc_up(idev);
+}
+
 /* Device going down */
 
 void ipv6_mc_down(struct inet6_dev *idev)
@@ -2097,12 +2467,9 @@ void ipv6_mc_down(struct inet6_dev *idev)
 	/* Withdraw multicast list */
 
 	read_lock_bh(&idev->lock);
-	idev->mc_ifc_count = 0;
-	if (del_timer(&idev->mc_ifc_timer))
-		__in6_dev_put(idev);
-	idev->mc_gq_running = 0;
-	if (del_timer(&idev->mc_gq_timer))
-		__in6_dev_put(idev);
+	mld_ifc_stop_timer(idev);
+	mld_gq_stop_timer(idev);
+	mld_dad_stop_timer(idev);
 
 	for (i = idev->mc_list; i; i=i->next)
 		igmp6_group_dropped(i);
@@ -2130,27 +2497,25 @@ void ipv6_mc_up(struct inet6_dev *idev)
 
 void ipv6_mc_init_dev(struct inet6_dev *idev)
 {
-	struct in6_addr maddr;
-
 	write_lock_bh(&idev->lock);
-	rwlock_init(&idev->mc_lock);
+	spin_lock_init(&idev->mc_lock);
 	idev->mc_gq_running = 0;
-	init_timer(&idev->mc_gq_timer);
-	idev->mc_gq_timer.data = (unsigned long) idev;
-	idev->mc_gq_timer.function = &mld_gq_timer_expire;
+	setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire,
+			(unsigned long)idev);
 	idev->mc_tomb = NULL;
 	idev->mc_ifc_count = 0;
-	init_timer(&idev->mc_ifc_timer);
-	idev->mc_ifc_timer.data = (unsigned long) idev;
-	idev->mc_ifc_timer.function = &mld_ifc_timer_expire;
+	setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire,
+			(unsigned long)idev);
+	setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
+		    (unsigned long)idev);
+
 	idev->mc_qrv = MLD_QRV_DEFAULT;
-	idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
+	idev->mc_qi = MLD_QI_DEFAULT;
+	idev->mc_qri = MLD_QRI_DEFAULT;
+
+	idev->mc_maxdelay = unsolicited_report_interval(idev);
 	idev->mc_v1_seen = 0;
 	write_unlock_bh(&idev->lock);
-
-	/* Add all-nodes address. */
-	ipv6_addr_all_nodes(&maddr);
-	ipv6_dev_mc_inc(idev->dev, &maddr);
 }
 
 /*
@@ -2160,24 +2525,19 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
 void ipv6_mc_destroy_dev(struct inet6_dev *idev)
 {
 	struct ifmcaddr6 *i;
-	struct in6_addr maddr;
 
 	/* Deactivate timers */
 	ipv6_mc_down(idev);
 
 	/* Delete all-nodes address. */
-	ipv6_addr_all_nodes(&maddr);
-
 	/* We cannot call ipv6_dev_mc_dec() directly, our caller in
 	 * addrconf.c has NULL'd out dev->ip6_ptr so in6_dev_get() will
 	 * fail.
 	 */
-	__ipv6_dev_mc_dec(idev, &maddr);
+	__ipv6_dev_mc_dec(idev, &in6addr_linklocal_allnodes);
 
-	if (idev->cnf.forwarding) {
-		ipv6_addr_all_routers(&maddr);
-		__ipv6_dev_mc_dec(idev, &maddr);
-	}
+	if (idev->cnf.forwarding)
+		__ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters);
 
 	write_lock_bh(&idev->lock);
 	while ((i = idev->mc_list) != NULL) {
@@ -2194,6 +2554,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
 
 #ifdef CONFIG_PROC_FS
 struct igmp6_mc_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct inet6_dev *idev;
 };
@@ -2204,12 +2565,12 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
 {
 	struct ifmcaddr6 *im = NULL;
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+	struct net *net = seq_file_net(seq);
 
-	for (state->dev = dev_base, state->idev = NULL;
-	     state->dev; 
-	     state->dev = state->dev->next) {
+	state->idev = NULL;
+	for_each_netdev_rcu(net, state->dev) {
 		struct inet6_dev *idev;
-		idev = in6_dev_get(state->dev);
+		idev = __in6_dev_get(state->dev);
 		if (!idev)
 			continue;
 		read_lock_bh(&idev->lock);
@@ -2219,7 +2580,6 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
 			break;
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
 	return im;
 }
@@ -2230,16 +2590,15 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr
 
 	im = im->next;
 	while (!im) {
-		if (likely(state->idev != NULL)) {
+		if (likely(state->idev != NULL))
 			read_unlock_bh(&state->idev->lock);
-			in6_dev_put(state->idev);
-		}
-		state->dev = state->dev->next;
+
+		state->dev = next_net_device_rcu(state->dev);
 		if (!state->dev) {
 			state->idev = NULL;
 			break;
 		}
-		state->idev = in6_dev_get(state->dev);
+		state->idev = __in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
 		read_lock_bh(&state->idev->lock);
@@ -2258,29 +2617,31 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
 {
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	return igmp6_mc_get_idx(seq, *pos);
 }
 
 static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct ifmcaddr6 *im;
-	im = igmp6_mc_get_next(seq, v);
+	struct ifmcaddr6 *im = igmp6_mc_get_next(seq, v);
+
 	++*pos;
 	return im;
 }
 
 static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
 {
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
 	if (likely(state->idev != NULL)) {
 		read_unlock_bh(&state->idev->lock);
-		in6_dev_put(state->idev);
 		state->idev = NULL;
 	}
 	state->dev = NULL;
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
@@ -2289,16 +2650,16 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
 
 	seq_printf(seq,
-		   "%-4d %-15s %04x%04x%04x%04x%04x%04x%04x%04x %5d %08X %ld\n", 
+		   "%-4d %-15s %pi6 %5d %08X %ld\n",
 		   state->dev->ifindex, state->dev->name,
-		   NIP6(im->mca_addr),
+		   &im->mca_addr,
 		   im->mca_users, im->mca_flags,
 		   (im->mca_flags&MAF_TIMER_RUNNING) ?
 		   jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0);
 	return 0;
 }
 
-static struct seq_operations igmp6_mc_seq_ops = {
+static const struct seq_operations igmp6_mc_seq_ops = {
 	.start	=	igmp6_mc_seq_start,
 	.next	=	igmp6_mc_seq_next,
 	.stop	=	igmp6_mc_seq_stop,
@@ -2307,36 +2668,20 @@ static struct seq_operations igmp6_mc_seq_ops = {
 
 static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int rc = -ENOMEM;
-	struct igmp6_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
-
-	if (!s)
-		goto out;
-
-	rc = seq_open(file, &igmp6_mc_seq_ops);
-	if (rc)
-		goto out_kfree;
-
-	seq = file->private_data;
-	seq->private = s;
-	memset(s, 0, sizeof(*s));
-out:
-	return rc;
-out_kfree:
-	kfree(s);
-	goto out;
+	return seq_open_net(inode, file, &igmp6_mc_seq_ops,
+			    sizeof(struct igmp6_mc_iter_state));
 }
 
-static struct file_operations igmp6_mc_seq_fops = {
+static const struct file_operations igmp6_mc_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	igmp6_mc_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
 struct igmp6_mcf_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct ifmcaddr6 *im;
@@ -2349,12 +2694,13 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 	struct ip6_sf_list *psf = NULL;
 	struct ifmcaddr6 *im = NULL;
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+	struct net *net = seq_file_net(seq);
 
-	for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
-	     state->dev; 
-	     state->dev = state->dev->next) {
+	state->idev = NULL;
+	state->im = NULL;
+	for_each_netdev_rcu(net, state->dev) {
 		struct inet6_dev *idev;
-		idev = in6_dev_get(state->dev);
+		idev = __in6_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
 		read_lock_bh(&idev->lock);
@@ -2370,7 +2716,6 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 			spin_unlock_bh(&im->mca_lock);
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
 	return psf;
 }
@@ -2384,16 +2729,15 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 		spin_unlock_bh(&state->im->mca_lock);
 		state->im = state->im->next;
 		while (!state->im) {
-			if (likely(state->idev != NULL)) {
+			if (likely(state->idev != NULL))
 				read_unlock_bh(&state->idev->lock);
-				in6_dev_put(state->idev);
-			}
-			state->dev = state->dev->next;
+
+			state->dev = next_net_device_rcu(state->dev);
 			if (!state->dev) {
 				state->idev = NULL;
 				goto out;
 			}
-			state->idev = in6_dev_get(state->dev);
+			state->idev = __in6_dev_get(state->dev);
 			if (!state->idev)
 				continue;
 			read_lock_bh(&state->idev->lock);
@@ -2418,8 +2762,9 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
 {
-	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -2435,6 +2780,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
 {
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
 	if (likely(state->im != NULL)) {
@@ -2443,11 +2789,10 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
 	}
 	if (likely(state->idev != NULL)) {
 		read_unlock_bh(&state->idev->lock);
-		in6_dev_put(state->idev);
 		state->idev = NULL;
 	}
 	state->dev = NULL;
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 }
 
 static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
@@ -2456,27 +2801,24 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, 
+		seq_printf(seq,
 			   "%3s %6s "
 			   "%32s %32s %6s %6s\n", "Idx",
 			   "Device", "Multicast Address",
 			   "Source Address", "INC", "EXC");
 	} else {
 		seq_printf(seq,
-			   "%3d %6.6s "
-			   "%04x%04x%04x%04x%04x%04x%04x%04x "
-			   "%04x%04x%04x%04x%04x%04x%04x%04x "
-			   "%6lu %6lu\n",
+			   "%3d %6.6s %pi6 %pi6 %6lu %6lu\n",
 			   state->dev->ifindex, state->dev->name,
-			   NIP6(state->im->mca_addr),
-			   NIP6(psf->sf_addr),
+			   &state->im->mca_addr,
+			   &psf->sf_addr,
 			   psf->sf_count[MCAST_INCLUDE],
 			   psf->sf_count[MCAST_EXCLUDE]);
 	}
 	return 0;
 }
 
-static struct seq_operations igmp6_mcf_seq_ops = {
+static const struct seq_operations igmp6_mcf_seq_ops = {
 	.start	=	igmp6_mcf_seq_start,
 	.next	=	igmp6_mcf_seq_next,
 	.stop	=	igmp6_mcf_seq_stop,
@@ -2485,73 +2827,95 @@ static struct seq_operations igmp6_mcf_seq_ops = {
 
 static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int rc = -ENOMEM;
-	struct igmp6_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
-	
-	if (!s)
-		goto out;
-
-	rc = seq_open(file, &igmp6_mcf_seq_ops);
-	if (rc)
-		goto out_kfree;
-
-	seq = file->private_data;
-	seq->private = s;
-	memset(s, 0, sizeof(*s));
-out:
-	return rc;
-out_kfree:
-	kfree(s);
-	goto out;
+	return seq_open_net(inode, file, &igmp6_mcf_seq_ops,
+			    sizeof(struct igmp6_mcf_iter_state));
 }
 
-static struct file_operations igmp6_mcf_seq_fops = {
+static const struct file_operations igmp6_mcf_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	igmp6_mcf_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
+
+static int __net_init igmp6_proc_init(struct net *net)
+{
+	int err;
+
+	err = -ENOMEM;
+	if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
+		goto out;
+	if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+			 &igmp6_mcf_seq_fops))
+		goto out_proc_net_igmp6;
+
+	err = 0;
+out:
+	return err;
+
+out_proc_net_igmp6:
+	remove_proc_entry("igmp6", net->proc_net);
+	goto out;
+}
+
+static void __net_exit igmp6_proc_exit(struct net *net)
+{
+	remove_proc_entry("mcfilter6", net->proc_net);
+	remove_proc_entry("igmp6", net->proc_net);
+}
+#else
+static inline int igmp6_proc_init(struct net *net)
+{
+	return 0;
+}
+static inline void igmp6_proc_exit(struct net *net)
+{
+}
 #endif
 
-int __init igmp6_init(struct net_proto_family *ops)
+static int __net_init igmp6_net_init(struct net *net)
 {
-	struct ipv6_pinfo *np;
-	struct sock *sk;
 	int err;
 
-	err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &igmp6_socket);
+	err = inet_ctl_sock_create(&net->ipv6.igmp_sk, PF_INET6,
+				   SOCK_RAW, IPPROTO_ICMPV6, net);
 	if (err < 0) {
-		printk(KERN_ERR
-		       "Failed to initialize the IGMP6 control socket (err %d).\n",
+		pr_err("Failed to initialize the IGMP6 control socket (err %d)\n",
 		       err);
-		igmp6_socket = NULL; /* For safety. */
-		return err;
+		goto out;
 	}
 
-	sk = igmp6_socket->sk;
-	sk->sk_allocation = GFP_ATOMIC;
-	sk->sk_prot->unhash(sk);
+	inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1;
 
-	np = inet6_sk(sk);
-	np->hop_limit = 1;
+	err = igmp6_proc_init(net);
+	if (err)
+		goto out_sock_create;
+out:
+	return err;
 
-#ifdef CONFIG_PROC_FS
-	proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
-	proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
-#endif
+out_sock_create:
+	inet_ctl_sock_destroy(net->ipv6.igmp_sk);
+	goto out;
+}
 
-	return 0;
+static void __net_exit igmp6_net_exit(struct net *net)
+{
+	inet_ctl_sock_destroy(net->ipv6.igmp_sk);
+	igmp6_proc_exit(net);
 }
 
-void igmp6_cleanup(void)
+static struct pernet_operations igmp6_net_ops = {
+	.init = igmp6_net_init,
+	.exit = igmp6_net_exit,
+};
+
+int __init igmp6_init(void)
 {
-	sock_release(igmp6_socket);
-	igmp6_socket = NULL; /* for safety */
+	return register_pernet_subsys(&igmp6_net_ops);
+}
 
-#ifdef CONFIG_PROC_FS
-	proc_net_remove("mcfilter6");
-	proc_net_remove("igmp6");
-#endif
+void igmp6_cleanup(void)
+{
+	unregister_pernet_subsys(&igmp6_net_ops);
 }
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
new file mode 100644
index 00000000000..db9b6cbc9db
--- /dev/null
+++ b/net/ipv6/mip6.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Authors:
+ *	Noriaki TAKAMIYA @USAGI
+ *	Masahide NAKAMURA @USAGI
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/time.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/rawv6.h>
+#include <net/xfrm.h>
+#include <net/mip6.h>
+
+static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
+{
+	return (n - len + 16) & 0x7;
+}
+
+static inline void *mip6_padn(__u8 *data, __u8 padlen)
+{
+	if (!data)
+		return NULL;
+	if (padlen == 1) {
+		data[0] = IPV6_TLV_PAD1;
+	} else if (padlen > 1) {
+		data[0] = IPV6_TLV_PADN;
+		data[1] = padlen - 2;
+		if (padlen > 2)
+			memset(data+2, 0, data[1]);
+	}
+	return data + padlen;
+}
+
+static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos)
+{
+	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
+}
+
+static int mip6_mh_len(int type)
+{
+	int len = 0;
+
+	switch (type) {
+	case IP6_MH_TYPE_BRR:
+		len = 0;
+		break;
+	case IP6_MH_TYPE_HOTI:
+	case IP6_MH_TYPE_COTI:
+	case IP6_MH_TYPE_BU:
+	case IP6_MH_TYPE_BACK:
+		len = 1;
+		break;
+	case IP6_MH_TYPE_HOT:
+	case IP6_MH_TYPE_COT:
+	case IP6_MH_TYPE_BERROR:
+		len = 2;
+		break;
+	}
+	return len;
+}
+
+static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
+{
+	struct ip6_mh _hdr;
+	const struct ip6_mh *mh;
+
+	mh = skb_header_pointer(skb, skb_transport_offset(skb),
+				sizeof(_hdr), &_hdr);
+	if (!mh)
+		return -1;
+
+	if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len)
+		return -1;
+
+	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
+		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
+			       mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
+		mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) +
+				skb_network_header_len(skb));
+		return -1;
+	}
+
+	if (mh->ip6mh_proto != IPPROTO_NONE) {
+		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
+			       mh->ip6mh_proto);
+		mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) +
+				skb_network_header_len(skb));
+		return -1;
+	}
+
+	return 0;
+}
+
+struct mip6_report_rate_limiter {
+	spinlock_t lock;
+	struct timeval stamp;
+	int iif;
+	struct in6_addr src;
+	struct in6_addr dst;
+};
+
+static struct mip6_report_rate_limiter mip6_report_rl = {
+	.lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock)
+};
+
+static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
+	int err = destopt->nexthdr;
+
+	spin_lock(&x->lock);
+	if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
+	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
+		err = -ENOENT;
+	spin_unlock(&x->lock);
+
+	return err;
+}
+
+/* Destination Option Header is inserted.
+ * IP Header's src address is replaced with Home Address Option in
+ * Destination Option Header.
+ */
+static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	struct ipv6_destopt_hdr *dstopt;
+	struct ipv6_destopt_hao *hao;
+	u8 nexthdr;
+	int len;
+
+	skb_push(skb, -skb_network_offset(skb));
+	iph = ipv6_hdr(skb);
+
+	nexthdr = *skb_mac_header(skb);
+	*skb_mac_header(skb) = IPPROTO_DSTOPTS;
+
+	dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb);
+	dstopt->nexthdr = nexthdr;
+
+	hao = mip6_padn((char *)(dstopt + 1),
+			calc_padlen(sizeof(*dstopt), 6));
+
+	hao->type = IPV6_TLV_HAO;
+	BUILD_BUG_ON(sizeof(*hao) != 18);
+	hao->length = sizeof(*hao) - 2;
+
+	len = ((char *)hao - (char *)dstopt) + sizeof(*hao);
+
+	memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr));
+	spin_lock_bh(&x->lock);
+	memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr));
+	spin_unlock_bh(&x->lock);
+
+	WARN_ON(len != x->props.header_len);
+	dstopt->hdrlen = (x->props.header_len >> 3) - 1;
+
+	return 0;
+}
+
+static inline int mip6_report_rl_allow(struct timeval *stamp,
+				       const struct in6_addr *dst,
+				       const struct in6_addr *src, int iif)
+{
+	int allow = 0;
+
+	spin_lock_bh(&mip6_report_rl.lock);
+	if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec ||
+	    mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
+	    mip6_report_rl.iif != iif ||
+	    !ipv6_addr_equal(&mip6_report_rl.src, src) ||
+	    !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
+		mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
+		mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
+		mip6_report_rl.iif = iif;
+		mip6_report_rl.src = *src;
+		mip6_report_rl.dst = *dst;
+		allow = 1;
+	}
+	spin_unlock_bh(&mip6_report_rl.lock);
+	return allow;
+}
+
+static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
+			       const struct flowi *fl)
+{
+	struct net *net = xs_net(x);
+	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+	const struct flowi6 *fl6 = &fl->u.ip6;
+	struct ipv6_destopt_hao *hao = NULL;
+	struct xfrm_selector sel;
+	int offset;
+	struct timeval stamp;
+	int err = 0;
+
+	if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
+		     fl6->fl6_mh_type <= IP6_MH_TYPE_MAX))
+		goto out;
+
+	if (likely(opt->dsthao)) {
+		offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+		if (likely(offset >= 0))
+			hao = (struct ipv6_destopt_hao *)
+					(skb_network_header(skb) + offset);
+	}
+
+	skb_get_timestamp(skb, &stamp);
+
+	if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
+				  hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
+				  opt->iif))
+		goto out;
+
+	memset(&sel, 0, sizeof(sel));
+	memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+	       sizeof(sel.daddr));
+	sel.prefixlen_d = 128;
+	memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
+	       sizeof(sel.saddr));
+	sel.prefixlen_s = 128;
+	sel.family = AF_INET6;
+	sel.proto = fl6->flowi6_proto;
+	sel.dport = xfrm_flowi_dport(fl, &fl6->uli);
+	if (sel.dport)
+		sel.dport_mask = htons(~0);
+	sel.sport = xfrm_flowi_sport(fl, &fl6->uli);
+	if (sel.sport)
+		sel.sport_mask = htons(~0);
+	sel.ifindex = fl6->flowi6_oif;
+
+	err = km_report(net, IPPROTO_DSTOPTS, &sel,
+			(hao ? (xfrm_address_t *)&hao->addr : NULL));
+
+ out:
+	return err;
+}
+
+static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
+			       u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr =
+				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb_tail_pointer(skb) -
+		skb_network_header(skb);
+	int found_rhdr = 0;
+
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (**nexthdr) {
+		case NEXTHDR_HOP:
+			break;
+		case NEXTHDR_ROUTING:
+			found_rhdr = 1;
+			break;
+		case NEXTHDR_DEST:
+			/*
+			 * HAO MUST NOT appear more than once.
+			 * XXX: It is better to try to find by the end of
+			 * XXX: packet if HAO exists.
+			 */
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
+				LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n");
+				return offset;
+			}
+
+			if (found_rhdr)
+				return offset;
+
+			break;
+		default:
+			return offset;
+		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+	}
+
+	return offset;
+}
+
+static int mip6_destopt_init_state(struct xfrm_state *x)
+{
+	if (x->id.spi) {
+		pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi);
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+		pr_info("%s: state's mode is not %u: %u\n",
+			__func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+		return -EINVAL;
+	}
+
+	x->props.header_len = sizeof(struct ipv6_destopt_hdr) +
+		calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) +
+		sizeof(struct ipv6_destopt_hao);
+	WARN_ON(x->props.header_len != 24);
+
+	return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for
+ * destination options header unlike IPsec protocols.
+ */
+static void mip6_destopt_destroy(struct xfrm_state *x)
+{
+}
+
+static const struct xfrm_type mip6_destopt_type =
+{
+	.description	= "MIP6DESTOPT",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_DSTOPTS,
+	.flags		= XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
+	.init_state	= mip6_destopt_init_state,
+	.destructor	= mip6_destopt_destroy,
+	.input		= mip6_destopt_input,
+	.output		= mip6_destopt_output,
+	.reject		= mip6_destopt_reject,
+	.hdr_offset	= mip6_destopt_offset,
+};
+
+static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
+	int err = rt2->rt_hdr.nexthdr;
+
+	spin_lock(&x->lock);
+	if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
+	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
+		err = -ENOENT;
+	spin_unlock(&x->lock);
+
+	return err;
+}
+
+/* Routing Header type 2 is inserted.
+ * IP Header's dst address is replaced with Routing Header's Home Address.
+ */
+static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	struct rt2_hdr *rt2;
+	u8 nexthdr;
+
+	skb_push(skb, -skb_network_offset(skb));
+	iph = ipv6_hdr(skb);
+
+	nexthdr = *skb_mac_header(skb);
+	*skb_mac_header(skb) = IPPROTO_ROUTING;
+
+	rt2 = (struct rt2_hdr *)skb_transport_header(skb);
+	rt2->rt_hdr.nexthdr = nexthdr;
+	rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
+	rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
+	rt2->rt_hdr.segments_left = 1;
+	memset(&rt2->reserved, 0, sizeof(rt2->reserved));
+
+	WARN_ON(rt2->rt_hdr.hdrlen != 2);
+
+	memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr));
+	spin_lock_bh(&x->lock);
+	memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr));
+	spin_unlock_bh(&x->lock);
+
+	return 0;
+}
+
+static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
+			     u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr =
+				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb_tail_pointer(skb) -
+		skb_network_header(skb);
+	int found_rhdr = 0;
+
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (**nexthdr) {
+		case NEXTHDR_HOP:
+			break;
+		case NEXTHDR_ROUTING:
+			if (offset + 3 <= packet_len) {
+				struct ipv6_rt_hdr *rt;
+				rt = (struct ipv6_rt_hdr *)(nh + offset);
+				if (rt->type != 0)
+					return offset;
+			}
+			found_rhdr = 1;
+			break;
+		case NEXTHDR_DEST:
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+				return offset;
+
+			if (found_rhdr)
+				return offset;
+
+			break;
+		default:
+			return offset;
+		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+	}
+
+	return offset;
+}
+
+static int mip6_rthdr_init_state(struct xfrm_state *x)
+{
+	if (x->id.spi) {
+		pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi);
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+		pr_info("%s: state's mode is not %u: %u\n",
+			__func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+		return -EINVAL;
+	}
+
+	x->props.header_len = sizeof(struct rt2_hdr);
+
+	return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for routing
+ * header type 2 unlike IPsec protocols.
+ */
+static void mip6_rthdr_destroy(struct xfrm_state *x)
+{
+}
+
+static const struct xfrm_type mip6_rthdr_type =
+{
+	.description	= "MIP6RT",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_ROUTING,
+	.flags		= XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
+	.init_state	= mip6_rthdr_init_state,
+	.destructor	= mip6_rthdr_destroy,
+	.input		= mip6_rthdr_input,
+	.output		= mip6_rthdr_output,
+	.hdr_offset	= mip6_rthdr_offset,
+};
+
+static int __init mip6_init(void)
+{
+	pr_info("Mobile IPv6\n");
+
+	if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
+		pr_info("%s: can't add xfrm type(destopt)\n", __func__);
+		goto mip6_destopt_xfrm_fail;
+	}
+	if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
+		pr_info("%s: can't add xfrm type(rthdr)\n", __func__);
+		goto mip6_rthdr_xfrm_fail;
+	}
+	if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
+		pr_info("%s: can't add rawv6 mh filter\n", __func__);
+		goto mip6_rawv6_mh_fail;
+	}
+
+
+	return 0;
+
+ mip6_rawv6_mh_fail:
+	xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
+ mip6_rthdr_xfrm_fail:
+	xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
+ mip6_destopt_xfrm_fail:
+	return -EAGAIN;
+}
+
+static void __exit mip6_fini(void)
+{
+	if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
+		pr_info("%s: can't remove rawv6 mh filter\n", __func__);
+	if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
+		pr_info("%s: can't remove xfrm type(rthdr)\n", __func__);
+	if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
+		pr_info("%s: can't remove xfrm type(destopt)\n", __func__);
+}
+
+module_init(mip6_init);
+module_exit(mip6_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 305d9ee6d7d..ca8d4ea48a5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1,9 +1,9 @@
 /*
  *	Neighbour Discovery for IPv6
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Mike Shaver		<shaver@ingenia.com>
  *
  *	This program is free software; you can redistribute it and/or
@@ -15,9 +15,11 @@
 /*
  *	Changes:
  *
+ *	Alexey I. Froloff		:	RFC6106 (DNSSL) support
+ *	Pierre Ynard			:	export userland ND options
+ *						through netlink (RDNSS support)
  *	Lars Fenneberg			:	fixed MTU setting on receipt
  *						of an RA.
- *
  *	Janos Farkas			:	kmalloc failure checks
  *	Alexey Kuznetsov		:	state machine reworked
  *						and moved to net/core.
@@ -25,30 +27,9 @@
  *	YOSHIFUJI Hideaki @USAGI	:	Verify ND options properly
  */
 
-/* Set to 3 to get tracing... */
-#define ND_DEBUG 1
-
-#define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
-#define ND_NOPRINTK(x...) do { ; } while(0)
-#define ND_PRINTK0 ND_PRINTK
-#define ND_PRINTK1 ND_NOPRINTK
-#define ND_PRINTK2 ND_NOPRINTK
-#define ND_PRINTK3 ND_NOPRINTK
-#if ND_DEBUG >= 1
-#undef ND_PRINTK1
-#define ND_PRINTK1 ND_PRINTK
-#endif
-#if ND_DEBUG >= 2
-#undef ND_PRINTK2
-#define ND_PRINTK2 ND_PRINTK
-#endif
-#if ND_DEBUG >= 3
-#undef ND_PRINTK3
-#define ND_PRINTK3 ND_PRINTK
-#endif
+#define pr_fmt(fmt) "ICMPv6: " fmt
 
 #include <linux/module.h>
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -59,10 +40,12 @@
 #include <linux/route.h>
 #include <linux/init.h>
 #include <linux/rcupdate.h>
+#include <linux/slab.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
 
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
@@ -78,16 +61,29 @@
 #include <net/addrconf.h>
 #include <net/icmp.h>
 
+#include <net/netlink.h>
+#include <linux/rtnetlink.h>
+
 #include <net/flow.h>
 #include <net/ip6_checksum.h>
+#include <net/inet_common.h>
 #include <linux/proc_fs.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 
-static struct socket *ndisc_socket;
+/* Set to 3 to get tracing... */
+#define ND_DEBUG 1
+
+#define ND_PRINTK(val, level, fmt, ...)				\
+do {								\
+	if (val <= ND_DEBUG)					\
+		net_##level##_ratelimited(fmt, ##__VA_ARGS__);	\
+} while (0)
 
-static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
+static u32 ndisc_hash(const void *pkey,
+		      const struct net_device *dev,
+		      __u32 *hash_rnd);
 static int ndisc_constructor(struct neighbour *neigh);
 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -95,38 +91,31 @@ static int pndisc_constructor(struct pneigh_entry *n);
 static void pndisc_destructor(struct pneigh_entry *n);
 static void pndisc_redo(struct sk_buff *skb);
 
-static struct neigh_ops ndisc_generic_ops = {
+static const struct neigh_ops ndisc_generic_ops = {
 	.family =		AF_INET6,
 	.solicit =		ndisc_solicit,
 	.error_report =		ndisc_error_report,
 	.output =		neigh_resolve_output,
 	.connected_output =	neigh_connected_output,
-	.hh_output =		dev_queue_xmit,
-	.queue_xmit =		dev_queue_xmit,
 };
 
-static struct neigh_ops ndisc_hh_ops = {
+static const struct neigh_ops ndisc_hh_ops = {
 	.family =		AF_INET6,
 	.solicit =		ndisc_solicit,
 	.error_report =		ndisc_error_report,
 	.output =		neigh_resolve_output,
 	.connected_output =	neigh_resolve_output,
-	.hh_output =		dev_queue_xmit,
-	.queue_xmit =		dev_queue_xmit,
 };
 
 
-static struct neigh_ops ndisc_direct_ops = {
+static const struct neigh_ops ndisc_direct_ops = {
 	.family =		AF_INET6,
-	.output =		dev_queue_xmit,
-	.connected_output =	dev_queue_xmit,
-	.hh_output =		dev_queue_xmit,
-	.queue_xmit =		dev_queue_xmit,
+	.output =		neigh_direct_output,
+	.connected_output =	neigh_direct_output,
 };
 
 struct neigh_table nd_tbl = {
 	.family =	AF_INET6,
-	.entry_size =	sizeof(struct neighbour) + sizeof(struct in6_addr),
 	.key_len =	sizeof(struct in6_addr),
 	.hash =		ndisc_hash,
 	.constructor =	ndisc_constructor,
@@ -135,18 +124,20 @@ struct neigh_table nd_tbl = {
 	.proxy_redo =	pndisc_redo,
 	.id =		"ndisc_cache",
 	.parms = {
-		.tbl =			&nd_tbl,
-		.base_reachable_time =	30 * HZ,
-		.retrans_time =	 1 * HZ,
-		.gc_staletime =	60 * HZ,
-		.reachable_time =		30 * HZ,
-		.delay_probe_time =	 5 * HZ,
-		.queue_len =		 3,
-		.ucast_probes =	 3,
-		.mcast_probes =	 3,
-		.anycast_delay =	 1 * HZ,
-		.proxy_delay =		(8 * HZ) / 10,
-		.proxy_qlen =		64,
+		.tbl			= &nd_tbl,
+		.reachable_time		= ND_REACHABLE_TIME,
+		.data = {
+			[NEIGH_VAR_MCAST_PROBES] = 3,
+			[NEIGH_VAR_UCAST_PROBES] = 3,
+			[NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
+			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
+			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
+			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+			[NEIGH_VAR_PROXY_QLEN] = 64,
+			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
+			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
+		},
 	},
 	.gc_interval =	  30 * HZ,
 	.gc_thresh1 =	 128,
@@ -154,44 +145,12 @@ struct neigh_table nd_tbl = {
 	.gc_thresh3 =	1024,
 };
 
-/* ND options */
-struct ndisc_options {
-	struct nd_opt_hdr *nd_opt_array[__ND_OPT_MAX];
-};
-
-#define nd_opts_src_lladdr	nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
-#define nd_opts_tgt_lladdr	nd_opt_array[ND_OPT_TARGET_LL_ADDR]
-#define nd_opts_pi		nd_opt_array[ND_OPT_PREFIX_INFO]
-#define nd_opts_pi_end		nd_opt_array[__ND_OPT_PREFIX_INFO_END]
-#define nd_opts_rh		nd_opt_array[ND_OPT_REDIRECT_HDR]
-#define nd_opts_mtu		nd_opt_array[ND_OPT_MTU]
-
-#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
-
-/*
- * Return the padding between the option length and the start of the
- * link addr.  Currently only IP-over-InfiniBand needs this, although
- * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
- * also need a pad of 2.
- */
-static int ndisc_addr_option_pad(unsigned short type)
-{
-	switch (type) {
-	case ARPHRD_INFINIBAND: return 2;
-	default:                return 0;
-	}
-}
-
-static inline int ndisc_opt_addr_space(struct net_device *dev)
-{
-	return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
-}
-
-static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
-				  unsigned short addr_type)
+static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
 {
-	int space = NDISC_OPT_SPACE(data_len);
-	int pad   = ndisc_addr_option_pad(addr_type);
+	int pad   = ndisc_addr_option_pad(skb->dev->type);
+	int data_len = skb->dev->addr_len;
+	int space = ndisc_opt_addr_space(skb->dev);
+	u8 *opt = skb_put(skb, space);
 
 	opt[0] = type;
 	opt[1] = space>>3;
@@ -205,7 +164,6 @@ static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
 	opt += data_len;
 	if ((space -= data_len) > 0)
 		memset(opt, 0, space);
-	return opt + space;
 }
 
 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
@@ -218,11 +176,28 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
 	do {
 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
 	} while(cur < end && cur->nd_opt_type != type);
-	return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
+	return cur <= end && cur->nd_opt_type == type ? cur : NULL;
 }
 
-static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
-						 struct ndisc_options *ndopts)
+static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
+{
+	return opt->nd_opt_type == ND_OPT_RDNSS ||
+		opt->nd_opt_type == ND_OPT_DNSSL;
+}
+
+static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
+					     struct nd_opt_hdr *end)
+{
+	if (!cur || !end || cur >= end)
+		return NULL;
+	do {
+		cur = ((void *)cur) + (cur->nd_opt_len << 3);
+	} while(cur < end && !ndisc_is_useropt(cur));
+	return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
+}
+
+struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+					  struct ndisc_options *ndopts)
 {
 	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
 
@@ -242,28 +217,42 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
 		case ND_OPT_MTU:
 		case ND_OPT_REDIRECT_HDR:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
-				ND_PRINTK2(KERN_WARNING
-					   "%s(): duplicated ND6 option found: type=%d\n",
-					   __FUNCTION__,
-					   nd_opt->nd_opt_type);
+				ND_PRINTK(2, warn,
+					  "%s: duplicated ND6 option found: type=%d\n",
+					  __func__, nd_opt->nd_opt_type);
 			} else {
 				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
 			}
 			break;
 		case ND_OPT_PREFIX_INFO:
 			ndopts->nd_opts_pi_end = nd_opt;
-			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0)
+			if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
 				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
 			break;
+#ifdef CONFIG_IPV6_ROUTE_INFO
+		case ND_OPT_ROUTE_INFO:
+			ndopts->nd_opts_ri_end = nd_opt;
+			if (!ndopts->nd_opts_ri)
+				ndopts->nd_opts_ri = nd_opt;
+			break;
+#endif
 		default:
-			/*
-			 * Unknown options must be silently ignored,
-			 * to accommodate future extension to the protocol.
-			 */
-			ND_PRINTK2(KERN_NOTICE
-				   "%s(): ignored unsupported option; type=%d, len=%d\n",
-				   __FUNCTION__,
-				   nd_opt->nd_opt_type, nd_opt->nd_opt_len);
+			if (ndisc_is_useropt(nd_opt)) {
+				ndopts->nd_useropts_end = nd_opt;
+				if (!ndopts->nd_useropts)
+					ndopts->nd_useropts = nd_opt;
+			} else {
+				/*
+				 * Unknown options must be silently ignored,
+				 * to accommodate future extension to the
+				 * protocol.
+				 */
+				ND_PRINTK(2, notice,
+					  "%s: ignored unsupported option; type=%d, len=%d\n",
+					  __func__,
+					  nd_opt->nd_opt_type,
+					  nd_opt->nd_opt_len);
+			}
 		}
 		opt_len -= l;
 		nd_opt = ((void *)nd_opt) + l;
@@ -271,18 +260,7 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
 	return ndopts;
 }
 
-static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
-				      struct net_device *dev)
-{
-	u8 *lladdr = (u8 *)(p + 1);
-	int lladdrlen = p->nd_opt_len << 3;
-	int prepad = ndisc_addr_option_pad(dev->type);
-	if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
-		return NULL;
-	return (lladdr + prepad);
-}
-
-int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
+int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
 {
 	switch (dev->type) {
 	case ARPHRD_ETHER:
@@ -290,15 +268,14 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
 	case ARPHRD_FDDI:
 		ipv6_eth_mc_map(addr, buf);
 		return 0;
-	case ARPHRD_IEEE802_TR:
-		ipv6_tr_mc_map(addr,buf);
-		return 0;
 	case ARPHRD_ARCNET:
 		ipv6_arcnet_mc_map(addr, buf);
 		return 0;
 	case ARPHRD_INFINIBAND:
-		ipv6_ib_mc_map(addr, buf);
+		ipv6_ib_mc_map(addr, dev->broadcast, buf);
 		return 0;
+	case ARPHRD_IPGRE:
+		return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
 	default:
 		if (dir) {
 			memcpy(buf, dev->broadcast, dev->addr_len);
@@ -308,16 +285,13 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
 	return -EINVAL;
 }
 
-static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
-{
-	const u32 *p32 = pkey;
-	u32 addr_hash, i;
-
-	addr_hash = 0;
-	for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
-		addr_hash ^= *p32++;
+EXPORT_SYMBOL(ndisc_mc_map);
 
-	return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
+static u32 ndisc_hash(const void *pkey,
+		      const struct net_device *dev,
+		      __u32 *hash_rnd)
+{
+	return ndisc_hashfn(pkey, dev, hash_rnd);
 }
 
 static int ndisc_constructor(struct neighbour *neigh)
@@ -326,25 +300,22 @@ static int ndisc_constructor(struct neighbour *neigh)
 	struct net_device *dev = neigh->dev;
 	struct inet6_dev *in6_dev;
 	struct neigh_parms *parms;
-	int is_multicast = ipv6_addr_is_multicast(addr);
+	bool is_multicast = ipv6_addr_is_multicast(addr);
 
-	rcu_read_lock();
 	in6_dev = in6_dev_get(dev);
 	if (in6_dev == NULL) {
-		rcu_read_unlock();
 		return -EINVAL;
 	}
 
 	parms = in6_dev->nd_parms;
 	__neigh_parms_put(neigh->parms);
 	neigh->parms = neigh_parms_clone(parms);
-	rcu_read_unlock();
 
 	neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
-	if (dev->hard_header == NULL) {
+	if (!dev->header_ops) {
 		neigh->nud_state = NUD_NOARP;
 		neigh->ops = &ndisc_direct_ops;
-		neigh->output = neigh->ops->queue_xmit;
+		neigh->output = neigh_direct_output;
 	} else {
 		if (is_multicast) {
 			neigh->nud_state = NUD_NOARP;
@@ -358,7 +329,7 @@ static int ndisc_constructor(struct neighbour *neigh)
 			neigh->nud_state = NUD_NOARP;
 			memcpy(neigh->ha, dev->broadcast, dev->addr_len);
 		}
-		if (dev->hard_header_cache)
+		if (dev->header_ops->cache)
 			neigh->ops = &ndisc_hh_ops;
 		else
 			neigh->ops = &ndisc_generic_ops;
@@ -396,274 +367,273 @@ static void pndisc_destructor(struct pneigh_entry *n)
 	ipv6_dev_mc_dec(dev, &maddr);
 }
 
-/*
- *	Send a Neighbour Advertisement
- */
+static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
+				       int len)
+{
+	int hlen = LL_RESERVED_SPACE(dev);
+	int tlen = dev->needed_tailroom;
+	struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
+	if (!skb) {
+		ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
+			  __func__);
+		return NULL;
+	}
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
+
+	skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
+	skb_reset_transport_header(skb);
 
-static inline void ndisc_flow_init(struct flowi *fl, u8 type,
-			    struct in6_addr *saddr, struct in6_addr *daddr)
+	/* Manually assign socket ownership as we avoid calling
+	 * sock_alloc_send_pskb() to bypass wmem buffer limits
+	 */
+	skb_set_owner_w(skb, sk);
+
+	return skb;
+}
+
+static void ip6_nd_hdr(struct sk_buff *skb,
+		       const struct in6_addr *saddr,
+		       const struct in6_addr *daddr,
+		       int hop_limit, int len)
 {
-	memset(fl, 0, sizeof(*fl));
-	ipv6_addr_copy(&fl->fl6_src, saddr);
-	ipv6_addr_copy(&fl->fl6_dst, daddr);
-	fl->proto	 	= IPPROTO_ICMPV6;
-	fl->fl_icmp_type	= type;
-	fl->fl_icmp_code	= 0;
+	struct ipv6hdr *hdr;
+
+	skb_push(skb, sizeof(*hdr));
+	skb_reset_network_header(skb);
+	hdr = ipv6_hdr(skb);
+
+	ip6_flow_hdr(hdr, 0, 0);
+
+	hdr->payload_len = htons(len);
+	hdr->nexthdr = IPPROTO_ICMPV6;
+	hdr->hop_limit = hop_limit;
+
+	hdr->saddr = *saddr;
+	hdr->daddr = *daddr;
 }
 
-static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
-		   struct in6_addr *daddr, struct in6_addr *solicited_addr,
-	 	   int router, int solicited, int override, int inc_opt) 
+static void ndisc_send_skb(struct sk_buff *skb,
+			   const struct in6_addr *daddr,
+			   const struct in6_addr *saddr)
 {
-	struct in6_addr tmpaddr;
-	struct inet6_ifaddr *ifp;
+	struct dst_entry *dst = skb_dst(skb);
+	struct net *net = dev_net(skb->dev);
+	struct sock *sk = net->ipv6.ndisc_sk;
 	struct inet6_dev *idev;
-	struct flowi fl;
-	struct dst_entry* dst;
-        struct sock *sk = ndisc_socket->sk;
-	struct in6_addr *src_addr;
-        struct nd_msg *msg;
-        int len;
-        struct sk_buff *skb;
 	int err;
+	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
+	u8 type;
+
+	type = icmp6h->icmp6_type;
+
+	if (!dst) {
+		struct flowi6 fl6;
 
-	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+		dst = icmp6_dst_alloc(skb->dev, &fl6);
+		if (IS_ERR(dst)) {
+			kfree_skb(skb);
+			return;
+		}
+
+		skb_dst_set(skb, dst);
+	}
+
+	icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
+					      IPPROTO_ICMPV6,
+					      csum_partial(icmp6h,
+							   skb->len, 0));
+
+	ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+
+	rcu_read_lock();
+	idev = __in6_dev_get(dst->dev);
+	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
+		      dst_output);
+	if (!err) {
+		ICMP6MSGOUT_INC_STATS(net, idev, type);
+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+	}
+
+	rcu_read_unlock();
+}
+
+void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+		   const struct in6_addr *daddr,
+		   const struct in6_addr *solicited_addr,
+		   bool router, bool solicited, bool override, bool inc_opt)
+{
+	struct sk_buff *skb;
+	struct in6_addr tmpaddr;
+	struct inet6_ifaddr *ifp;
+	const struct in6_addr *src_addr;
+	struct nd_msg *msg;
+	int optlen = 0;
 
 	/* for anycast or proxy, solicited_addr != src_addr */
-	ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
- 	if (ifp) {
+	ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
+	if (ifp) {
 		src_addr = solicited_addr;
+		if (ifp->flags & IFA_F_OPTIMISTIC)
+			override = false;
+		inc_opt |= ifp->idev->cnf.force_tllao;
 		in6_ifa_put(ifp);
 	} else {
-		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
+		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
+				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
+				       &tmpaddr))
 			return;
 		src_addr = &tmpaddr;
 	}
 
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
-
-	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
-	if (!dst)
-		return;
+	if (!dev->addr_len)
+		inc_opt = 0;
+	if (inc_opt)
+		optlen += ndisc_opt_addr_space(dev);
 
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
-	if (err < 0)
+	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!skb)
 		return;
 
-	if (inc_opt) {
-		if (dev->addr_len)
-			len += ndisc_opt_addr_space(dev);
-		else
-			inc_opt = 0;
-	}
-
-	skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
-				  1, &err);
-
-	if (skb == NULL) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 NA: %s() failed to allocate an skb.\n", 
-			   __FUNCTION__);
-		dst_release(dst);
-		return;
-	}
+	msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+	*msg = (struct nd_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+			.icmp6_router = router,
+			.icmp6_solicited = solicited,
+			.icmp6_override = override,
+		},
+		.target = *solicited_addr,
+	};
 
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
+	if (inc_opt)
+		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
+				       dev->dev_addr);
 
-	msg = (struct nd_msg *)skb_put(skb, len);
-	skb->h.raw = (unsigned char*)msg;
 
-        msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
-        msg->icmph.icmp6_code = 0;
-        msg->icmph.icmp6_cksum = 0;
+	ndisc_send_skb(skb, daddr, src_addr);
+}
 
-        msg->icmph.icmp6_unused = 0;
-        msg->icmph.icmp6_router    = router;
-        msg->icmph.icmp6_solicited = solicited;
-        msg->icmph.icmp6_override  = !!override;
+static void ndisc_send_unsol_na(struct net_device *dev)
+{
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifa;
 
-        /* Set the target address. */
-	ipv6_addr_copy(&msg->target, solicited_addr);
+	idev = in6_dev_get(dev);
+	if (!idev)
+		return;
 
-	if (inc_opt)
-		ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr,
-				       dev->addr_len, dev->type);
-
-	/* checksum */
-	msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len, 
-						 IPPROTO_ICMPV6,
-						 csum_partial((__u8 *) msg, 
-							      len, 0));
-
-	skb->dst = dst;
-	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
-	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+	read_lock_bh(&idev->lock);
+	list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr,
+			      /*router=*/ !!idev->cnf.forwarding,
+			      /*solicited=*/ false, /*override=*/ true,
+			      /*inc_opt=*/ true);
 	}
+	read_unlock_bh(&idev->lock);
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
-}        
+	in6_dev_put(idev);
+}
 
 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
-		   struct in6_addr *solicit,
-		   struct in6_addr *daddr, struct in6_addr *saddr) 
+		   const struct in6_addr *solicit,
+		   const struct in6_addr *daddr, const struct in6_addr *saddr)
 {
-	struct flowi fl;
-	struct dst_entry* dst;
-	struct inet6_dev *idev;
-        struct sock *sk = ndisc_socket->sk;
-        struct sk_buff *skb;
-        struct nd_msg *msg;
+	struct sk_buff *skb;
 	struct in6_addr addr_buf;
-        int len;
-	int err;
-	int send_llinfo;
+	int inc_opt = dev->addr_len;
+	int optlen = 0;
+	struct nd_msg *msg;
 
 	if (saddr == NULL) {
-		if (ipv6_get_lladdr(dev, &addr_buf))
+		if (ipv6_get_lladdr(dev, &addr_buf,
+				   (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
 			return;
 		saddr = &addr_buf;
 	}
 
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
-
-	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
-	if (!dst)
-		return;
+	if (ipv6_addr_any(saddr))
+		inc_opt = false;
+	if (inc_opt)
+		optlen += ndisc_opt_addr_space(dev);
 
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
-	if (err < 0)
+	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!skb)
 		return;
 
-	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-	send_llinfo = dev->addr_len && !ipv6_addr_any(saddr);
-	if (send_llinfo)
-		len += ndisc_opt_addr_space(dev);
-
-	skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
-				  1, &err);
-	if (skb == NULL) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 NA: %s() failed to allocate an skb.\n", 
-			   __FUNCTION__);
-		dst_release(dst);
-		return;
-	}
+	msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+	*msg = (struct nd_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+		},
+		.target = *solicit,
+	};
 
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
-	msg = (struct nd_msg *)skb_put(skb, len);
-	skb->h.raw = (unsigned char*)msg;
-	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
-	msg->icmph.icmp6_code = 0;
-	msg->icmph.icmp6_cksum = 0;
-	msg->icmph.icmp6_unused = 0;
-
-	/* Set the target address. */
-	ipv6_addr_copy(&msg->target, solicit);
-
-	if (send_llinfo)
-		ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
-				       dev->addr_len, dev->type);
-
-	/* checksum */
-	msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-						 daddr, len, 
-						 IPPROTO_ICMPV6,
-						 csum_partial((__u8 *) msg, 
-							      len, 0));
-	/* send it! */
-	skb->dst = dst;
-	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
-	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
-	}
+	if (inc_opt)
+		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+				       dev->dev_addr);
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	ndisc_send_skb(skb, daddr, saddr);
 }
 
-void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
-		   struct in6_addr *daddr)
+void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
+		   const struct in6_addr *daddr)
 {
-	struct flowi fl;
-	struct dst_entry* dst;
-	struct inet6_dev *idev;
-	struct sock *sk = ndisc_socket->sk;
-        struct sk_buff *skb;
-        struct icmp6hdr *hdr;
-	__u8 * opt;
-        int len;
-	int err;
-
-	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
-
-	dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
-	if (!dst)
-		return;
-
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
-	if (err < 0)
-		return;
-
-	len = sizeof(struct icmp6hdr);
-	if (dev->addr_len)
-		len += ndisc_opt_addr_space(dev);
+	struct sk_buff *skb;
+	struct rs_msg *msg;
+	int send_sllao = dev->addr_len;
+	int optlen = 0;
 
-        skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
-				  1, &err);
-	if (skb == NULL) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 RS: %s() failed to allocate an skb.\n", 
-			   __FUNCTION__);
-		dst_release(dst);
-		return;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	/*
+	 * According to section 2.2 of RFC 4429, we must not
+	 * send router solicitations with a sllao from
+	 * optimistic addresses, but we may send the solicitation
+	 * if we don't include the sllao.  So here we check
+	 * if our address is optimistic, and if so, we
+	 * suppress the inclusion of the sllao.
+	 */
+	if (send_sllao) {
+		struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
+							   dev, 1);
+		if (ifp) {
+			if (ifp->flags & IFA_F_OPTIMISTIC)  {
+				send_sllao = 0;
+			}
+			in6_ifa_put(ifp);
+		} else {
+			send_sllao = 0;
+		}
 	}
+#endif
+	if (send_sllao)
+		optlen += ndisc_opt_addr_space(dev);
 
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
-        hdr = (struct icmp6hdr *)skb_put(skb, len);
-        skb->h.raw = (unsigned char*)hdr;
-        hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
-        hdr->icmp6_code = 0;
-        hdr->icmp6_cksum = 0;
-        hdr->icmp6_unused = 0;
-
-	opt = (u8*) (hdr + 1);
-
-	if (dev->addr_len)
-		ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
-				       dev->addr_len, dev->type);
+	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!skb)
+		return;
 
-	/* checksum */
-	hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
-					   IPPROTO_ICMPV6,
-					   csum_partial((__u8 *) hdr, len, 0));
+	msg = (struct rs_msg *)skb_put(skb, sizeof(*msg));
+	*msg = (struct rs_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_ROUTER_SOLICITATION,
+		},
+	};
 
-	/* send it! */
-	skb->dst = dst;
-	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);	
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
-	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
-	}
+	if (send_sllao)
+		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+				       dev->dev_addr);
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	ndisc_send_skb(skb, daddr, saddr);
 }
-		   
+
 
 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
 {
@@ -685,46 +655,63 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
 
-	if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1))
-		saddr = &skb->nh.ipv6h->saddr;
+	if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
+		saddr = &ipv6_hdr(skb)->saddr;
 
-	if ((probes -= neigh->parms->ucast_probes) < 0) {
+	if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) {
 		if (!(neigh->nud_state & NUD_VALID)) {
-			ND_PRINTK1(KERN_DEBUG
-				   "%s(): trying to ucast probe in NUD_INVALID: "
-				   "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-				   __FUNCTION__,
-				   NIP6(*target));
+			ND_PRINTK(1, dbg,
+				  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
+				  __func__, target);
 		}
 		ndisc_send_ns(dev, neigh, target, target, saddr);
-	} else if ((probes -= neigh->parms->app_probes) < 0) {
-#ifdef CONFIG_ARPD
+	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
 		neigh_app_ns(neigh);
-#endif
 	} else {
 		addrconf_addr_solict_mult(target, &mcaddr);
 		ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
 	}
 }
 
+static int pndisc_is_router(const void *pkey,
+			    struct net_device *dev)
+{
+	struct pneigh_entry *n;
+	int ret = -1;
+
+	read_lock_bh(&nd_tbl.lock);
+	n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
+	if (n)
+		ret = !!(n->flags & NTF_ROUTER);
+	read_unlock_bh(&nd_tbl.lock);
+
+	return ret;
+}
+
 static void ndisc_recv_ns(struct sk_buff *skb)
 {
-	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
-	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
-	struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
-	u32 ndoptlen = skb->tail - msg->opt;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				    offsetof(struct nd_msg, opt));
 	struct ndisc_options ndopts;
 	struct net_device *dev = skb->dev;
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev = NULL;
 	struct neighbour *neigh;
 	int dad = ipv6_addr_any(saddr);
-	int inc;
+	bool inc;
+	int is_router = -1;
+
+	if (skb->len < sizeof(struct nd_msg)) {
+		ND_PRINTK(2, warn, "NS: packet too short\n");
+		return;
+	}
 
 	if (ipv6_addr_is_multicast(&msg->target)) {
-		ND_PRINTK2(KERN_WARNING 
-			   "ICMPv6 NS: multicast target address");
+		ND_PRINTK(2, warn, "NS: multicast target address\n");
 		return;
 	}
 
@@ -732,88 +719,84 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	 * RFC2461 7.1.1:
 	 * DAD has to be destined for solicited node multicast address.
 	 */
-	if (dad &&
-	    !(daddr->s6_addr32[0] == htonl(0xff020000) &&
-	      daddr->s6_addr32[1] == htonl(0x00000000) &&
-	      daddr->s6_addr32[2] == htonl(0x00000001) &&
-	      daddr->s6_addr [12] == 0xff )) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 NS: bad DAD packet (wrong destination)\n");
+	if (dad && !ipv6_addr_is_solict_mult(daddr)) {
+		ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
 		return;
 	}
 
 	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
-		ND_PRINTK2(KERN_WARNING 
-			   "ICMPv6 NS: invalid ND options\n");
+		ND_PRINTK(2, warn, "NS: invalid ND options\n");
 		return;
 	}
 
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
 		if (!lladdr) {
-			ND_PRINTK2(KERN_WARNING
-				   "ICMPv6 NS: invalid link-layer address length\n");
+			ND_PRINTK(2, warn,
+				  "NS: invalid link-layer address length\n");
 			return;
 		}
 
 		/* RFC2461 7.1.1:
-	 	 *	If the IP source address is the unspecified address, 
-		 *	there MUST NOT be source link-layer address option 
+		 *	If the IP source address is the unspecified address,
+		 *	there MUST NOT be source link-layer address option
 		 *	in the message.
 		 */
 		if (dad) {
-			ND_PRINTK2(KERN_WARNING 
-				   "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
+			ND_PRINTK(2, warn,
+				  "NS: bad DAD packet (link-layer address option)\n");
 			return;
 		}
 	}
 
 	inc = ipv6_addr_is_multicast(daddr);
 
-	if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
-		if (ifp->flags & IFA_F_TENTATIVE) {
-			/* Address is tentative. If the source
-			   is unspecified address, it is someone
-			   does DAD, otherwise we ignore solicitations
-			   until DAD timer expires.
-			 */
-			if (!dad)
-				goto out;
-			if (dev->type == ARPHRD_IEEE802_TR) {
-				unsigned char *sadr = skb->mac.raw;
-				if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
-				    sadr[9] == dev->dev_addr[1] &&
-				    sadr[10] == dev->dev_addr[2] &&
-				    sadr[11] == dev->dev_addr[3] &&
-				    sadr[12] == dev->dev_addr[4] &&
-				    sadr[13] == dev->dev_addr[5]) {
-					/* looped-back to us */
+	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
+	if (ifp) {
+
+		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
+			if (dad) {
+				/*
+				 * We are colliding with another node
+				 * who is doing DAD
+				 * so fail our DAD process
+				 */
+				addrconf_dad_failure(ifp);
+				return;
+			} else {
+				/*
+				 * This is not a dad solicitation.
+				 * If we are an optimistic node,
+				 * we should respond.
+				 * Otherwise, we should ignore it.
+				 */
+				if (!(ifp->flags & IFA_F_OPTIMISTIC))
 					goto out;
-				}
 			}
-			addrconf_dad_failure(ifp); 
-			return;
 		}
 
 		idev = ifp->idev;
 	} else {
+		struct net *net = dev_net(dev);
+
 		idev = in6_dev_get(dev);
 		if (!idev) {
 			/* XXX: count this drop? */
 			return;
 		}
 
-		if (ipv6_chk_acast_addr(dev, &msg->target) ||
-		    (idev->cnf.forwarding && 
-		     pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) {
+		if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
+		    (idev->cnf.forwarding &&
+		     (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
+		     (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
 			    skb->pkt_type != PACKET_HOST &&
-			    inc != 0 &&
-			    idev->nd_parms->proxy_delay != 0) {
+			    inc &&
+			    NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {
 				/*
 				 * for anycast or proxy,
-				 * sender should delay its response 
-				 * by a random time between 0 and 
+				 * sender should delay its response
+				 * by a random time between 0 and
 				 * MAX_ANYCAST_DELAY_TIME seconds.
 				 * (RFC2461) -- yoshfuji
 				 */
@@ -826,12 +809,12 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			goto out;
 	}
 
-	if (dad) {
-		struct in6_addr maddr;
+	if (is_router < 0)
+		is_router = idev->cnf.forwarding;
 
-		ipv6_addr_all_nodes(&maddr);
-		ndisc_send_na(dev, NULL, &maddr, &msg->target,
-			      idev->cnf.forwarding, 0, (ifp != NULL), 1);
+	if (dad) {
+		ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
+			      !!is_router, false, (ifp != NULL), true);
 		goto out;
 	}
 
@@ -840,20 +823,20 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	else
 		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
 
-	/* 
+	/*
 	 *	update / create cache entry
 	 *	for the source address
 	 */
 	neigh = __neigh_lookup(&nd_tbl, saddr, dev,
 			       !inc || lladdr || !dev->addr_len);
 	if (neigh)
-		neigh_update(neigh, lladdr, NUD_STALE, 
+		neigh_update(neigh, lladdr, NUD_STALE,
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
 			     NEIGH_UPDATE_F_OVERRIDE);
-	if (neigh || !dev->hard_header) {
+	if (neigh || !dev->header_ops) {
 		ndisc_send_na(dev, neigh, saddr, &msg->target,
-			      idev->cnf.forwarding, 
-			      1, (ifp != NULL && inc), inc);
+			      !!is_router,
+			      true, (ifp != NULL && inc), inc);
 		if (neigh)
 			neigh_release(neigh);
 	}
@@ -863,67 +846,69 @@ out:
 		in6_ifa_put(ifp);
 	else
 		in6_dev_put(idev);
-
-	return;
 }
 
 static void ndisc_recv_na(struct sk_buff *skb)
 {
-	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
-	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
-	struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
-	u32 ndoptlen = skb->tail - msg->opt;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				    offsetof(struct nd_msg, opt));
 	struct ndisc_options ndopts;
 	struct net_device *dev = skb->dev;
 	struct inet6_ifaddr *ifp;
 	struct neighbour *neigh;
 
 	if (skb->len < sizeof(struct nd_msg)) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 NA: packet too short\n");
+		ND_PRINTK(2, warn, "NA: packet too short\n");
 		return;
 	}
 
 	if (ipv6_addr_is_multicast(&msg->target)) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 NA: target address is multicast.\n");
+		ND_PRINTK(2, warn, "NA: target address is multicast\n");
 		return;
 	}
 
 	if (ipv6_addr_is_multicast(daddr) &&
 	    msg->icmph.icmp6_solicited) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 NA: solicited NA is multicasted.\n");
+		ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n");
 		return;
 	}
-		
+
 	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 NS: invalid ND option\n");
+		ND_PRINTK(2, warn, "NS: invalid ND option\n");
 		return;
 	}
 	if (ndopts.nd_opts_tgt_lladdr) {
 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
 		if (!lladdr) {
-			ND_PRINTK2(KERN_WARNING
-				   "ICMPv6 NA: invalid link-layer address length\n");
+			ND_PRINTK(2, warn,
+				  "NA: invalid link-layer address length\n");
 			return;
 		}
 	}
-	if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1))) {
-		if (ifp->flags & IFA_F_TENTATIVE) {
-			addrconf_dad_failure(ifp);
-			return;
+	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
+	if (ifp) {
+		if (skb->pkt_type != PACKET_LOOPBACK
+		    && (ifp->flags & IFA_F_TENTATIVE)) {
+				addrconf_dad_failure(ifp);
+				return;
 		}
 		/* What should we make now? The advertisement
 		   is invalid, but ndisc specs say nothing
 		   about it. It could be misconfiguration, or
 		   an smart proxy agent tries to help us :-)
+
+		   We should not print the error if NA has been
+		   received from loopback - it is just our own
+		   unsolicited advertisement.
 		 */
-		ND_PRINTK1(KERN_WARNING
-			   "ICMPv6 NA: someone advertises our address on %s!\n",
-			   ifp->idev->dev->name);
+		if (skb->pkt_type != PACKET_LOOPBACK)
+			ND_PRINTK(1, warn,
+				  "NA: someone advertises our address %pI6 on %s!\n",
+				  &ifp->addr, ifp->idev->dev->name);
 		in6_ifa_put(ifp);
 		return;
 	}
@@ -931,10 +916,23 @@ static void ndisc_recv_na(struct sk_buff *skb)
 
 	if (neigh) {
 		u8 old_flags = neigh->flags;
+		struct net *net = dev_net(dev);
 
 		if (neigh->nud_state & NUD_FAILED)
 			goto out;
 
+		/*
+		 * Don't update the neighbor cache entry on a proxy NA from
+		 * ourselves because either the proxied node is off link or it
+		 * has already sent a NA to us.
+		 */
+		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
+		    net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
+		    pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
+			/* XXX: idev->cnf.proxy_ndp */
+			goto out;
+		}
+
 		neigh_update(neigh, lladdr,
 			     msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
@@ -946,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 			/*
 			 * Change: router to host
 			 */
-			struct rt6_info *rt;
-			rt = rt6_get_dflt_router(saddr, dev);
-			if (rt)
-				ip6_del_rt(rt, NULL, NULL, NULL);
+			rt6_clean_tohost(dev_net(dev),  saddr);
 		}
 
 out:
@@ -959,21 +954,20 @@ out:
 
 static void ndisc_recv_rs(struct sk_buff *skb)
 {
-	struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw;
+	struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
 	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
 	struct neighbour *neigh;
 	struct inet6_dev *idev;
-	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
 	struct ndisc_options ndopts;
 	u8 *lladdr = NULL;
 
 	if (skb->len < sizeof(*rs_msg))
 		return;
 
-	idev = in6_dev_get(skb->dev);
+	idev = __in6_dev_get(skb->dev);
 	if (!idev) {
-		if (net_ratelimit())
-			ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
+		ND_PRINTK(1, err, "RS: can't find in6 device\n");
 		return;
 	}
 
@@ -990,8 +984,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
 
 	/* Parse ND options */
 	if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
-		if (net_ratelimit())
-			ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
+		ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
 		goto out;
 	}
 
@@ -1011,57 +1004,113 @@ static void ndisc_recv_rs(struct sk_buff *skb)
 		neigh_release(neigh);
 	}
 out:
-	in6_dev_put(idev);
+	return;
+}
+
+static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
+{
+	struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	struct nduseroptmsg *ndmsg;
+	struct net *net = dev_net(ra->dev);
+	int err;
+	int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
+				    + (opt->nd_opt_len << 3));
+	size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
+
+	skb = nlmsg_new(msg_size, GFP_ATOMIC);
+	if (skb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
+
+	nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
+	if (nlh == NULL) {
+		goto nla_put_failure;
+	}
+
+	ndmsg = nlmsg_data(nlh);
+	ndmsg->nduseropt_family = AF_INET6;
+	ndmsg->nduseropt_ifindex = ra->dev->ifindex;
+	ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
+	ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
+	ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
+
+	memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
+
+	if (nla_put(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
+		    &ipv6_hdr(ra)->saddr))
+		goto nla_put_failure;
+	nlmsg_end(skb, nlh);
+
+	rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
+	return;
+
+nla_put_failure:
+	nlmsg_free(skb);
+	err = -EMSGSIZE;
+errout:
+	rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
 }
 
 static void ndisc_router_discovery(struct sk_buff *skb)
 {
-        struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
+	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
 	struct neighbour *neigh = NULL;
 	struct inet6_dev *in6_dev;
-	struct rt6_info *rt;
+	struct rt6_info *rt = NULL;
 	int lifetime;
 	struct ndisc_options ndopts;
 	int optlen;
+	unsigned int pref = 0;
 
 	__u8 * opt = (__u8 *)(ra_msg + 1);
 
-	optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
+	optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
+		sizeof(struct ra_msg);
 
-	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 RA: source address is not link-local.\n");
+	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
+		ND_PRINTK(2, warn, "RA: source address is not link-local\n");
 		return;
 	}
 	if (optlen < 0) {
-		ND_PRINTK2(KERN_WARNING 
-			   "ICMPv6 RA: packet too short\n");
+		ND_PRINTK(2, warn, "RA: packet too short\n");
 		return;
 	}
 
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+	if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
+		ND_PRINTK(2, warn, "RA: from host or unauthorized router\n");
+		return;
+	}
+#endif
+
 	/*
 	 *	set the RA_RECV flag in the interface
 	 */
 
-	in6_dev = in6_dev_get(skb->dev);
+	in6_dev = __in6_dev_get(skb->dev);
 	if (in6_dev == NULL) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 RA: can't find inet6 device for %s.\n",
-			   skb->dev->name);
-		return;
-	}
-	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
-		in6_dev_put(in6_dev);
+		ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
+			  skb->dev->name);
 		return;
 	}
 
 	if (!ndisc_parse_options(opt, optlen, &ndopts)) {
-		in6_dev_put(in6_dev);
-		ND_PRINTK2(KERN_WARNING
-			   "ICMP6 RA: invalid ND options\n");
+		ND_PRINTK(2, warn, "RA: invalid ND options\n");
 		return;
 	}
 
+	if (!ipv6_accept_ra(in6_dev))
+		goto skip_linkparms;
+
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+	/* skip link-specific parameters from interior routers */
+	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+		goto skip_linkparms;
+#endif
+
 	if (in6_dev->if_flags & IF_RS_SENT) {
 		/*
 		 *	flag that an RA was received after an RS was sent
@@ -1081,53 +1130,74 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 				(ra_msg->icmph.icmp6_addrconf_other ?
 					IF_RA_OTHERCONF : 0);
 
+	if (!in6_dev->cnf.accept_ra_defrtr)
+		goto skip_defrtr;
+
+	if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+		goto skip_defrtr;
+
 	lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
 
-	rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+#ifdef CONFIG_IPV6_ROUTER_PREF
+	pref = ra_msg->icmph.icmp6_router_pref;
+	/* 10b is handled as if it were 00b (medium) */
+	if (pref == ICMPV6_ROUTER_PREF_INVALID ||
+	    !in6_dev->cnf.accept_ra_rtr_pref)
+		pref = ICMPV6_ROUTER_PREF_MEDIUM;
+#endif
 
-	if (rt)
-		neigh = rt->rt6i_nexthop;
+	rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
 
+	if (rt) {
+		neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+		if (!neigh) {
+			ND_PRINTK(0, err,
+				  "RA: %s got default router without neighbour\n",
+				  __func__);
+			ip6_rt_put(rt);
+			return;
+		}
+	}
 	if (rt && lifetime == 0) {
-		neigh_clone(neigh);
-		ip6_del_rt(rt, NULL, NULL, NULL);
+		ip6_del_rt(rt);
 		rt = NULL;
 	}
 
 	if (rt == NULL && lifetime) {
-		ND_PRINTK3(KERN_DEBUG
-			   "ICMPv6 RA: adding default router.\n");
+		ND_PRINTK(3, dbg, "RA: adding default router\n");
 
-		rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+		rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
 		if (rt == NULL) {
-			ND_PRINTK0(KERN_ERR
-				   "ICMPv6 RA: %s() failed to add default route.\n",
-				   __FUNCTION__);
-			in6_dev_put(in6_dev);
+			ND_PRINTK(0, err,
+				  "RA: %s failed to add default route\n",
+				  __func__);
 			return;
 		}
 
-		neigh = rt->rt6i_nexthop;
+		neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
 		if (neigh == NULL) {
-			ND_PRINTK0(KERN_ERR
-				   "ICMPv6 RA: %s() got default router without neighbour.\n",
-				   __FUNCTION__);
-			dst_release(&rt->u.dst);
-			in6_dev_put(in6_dev);
+			ND_PRINTK(0, err,
+				  "RA: %s got default router without neighbour\n",
+				  __func__);
+			ip6_rt_put(rt);
 			return;
 		}
 		neigh->flags |= NTF_ROUTER;
+	} else if (rt) {
+		rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 	}
 
 	if (rt)
-		rt->rt6i_expires = jiffies + (HZ * lifetime);
-
+		rt6_set_expires(rt, jiffies + (HZ * lifetime));
 	if (ra_msg->icmph.icmp6_hop_limit) {
 		in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
 		if (rt)
-			rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+			dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+				       ra_msg->icmph.icmp6_hop_limit);
 	}
 
+skip_defrtr:
+
 	/*
 	 *	Update Reachable Time and Retrans Timer
 	 */
@@ -1139,7 +1209,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			rtime = (rtime*HZ)/1000;
 			if (rtime < HZ/10)
 				rtime = HZ/10;
-			in6_dev->nd_parms->retrans_time = rtime;
+			NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
 			in6_dev->tstamp = jiffies;
 			inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
 		}
@@ -1151,9 +1221,11 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			if (rtime < HZ/10)
 				rtime = HZ/10;
 
-			if (rtime != in6_dev->nd_parms->base_reachable_time) {
-				in6_dev->nd_parms->base_reachable_time = rtime;
-				in6_dev->nd_parms->gc_staletime = 3 * rtime;
+			if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) {
+				NEIGH_VAR_SET(in6_dev->nd_parms,
+					      BASE_REACHABLE_TIME, rtime);
+				NEIGH_VAR_SET(in6_dev->nd_parms,
+					      GC_STALETIME, 3 * rtime);
 				in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
 				in6_dev->tstamp = jiffies;
 				inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
@@ -1161,12 +1233,14 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		}
 	}
 
+skip_linkparms:
+
 	/*
 	 *	Process options.
 	 */
 
 	if (!neigh)
-		neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr,
+		neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
 				       skb->dev, 1);
 	if (neigh) {
 		u8 *lladdr = NULL;
@@ -1174,8 +1248,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
 						     skb->dev);
 			if (!lladdr) {
-				ND_PRINTK2(KERN_WARNING
-					   "ICMPv6 RA: invalid link-layer address length\n");
+				ND_PRINTK(2, warn,
+					  "RA: invalid link-layer address length\n");
 				goto out;
 			}
 		}
@@ -1186,266 +1260,261 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			     NEIGH_UPDATE_F_ISROUTER);
 	}
 
-	if (ndopts.nd_opts_pi) {
+	if (!ipv6_accept_ra(in6_dev))
+		goto out;
+
+#ifdef CONFIG_IPV6_ROUTE_INFO
+	if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+		goto skip_routeinfo;
+
+	if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
+		struct nd_opt_hdr *p;
+		for (p = ndopts.nd_opts_ri;
+		     p;
+		     p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
+			struct route_info *ri = (struct route_info *)p;
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+			if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
+			    ri->prefix_len == 0)
+				continue;
+#endif
+			if (ri->prefix_len == 0 &&
+			    !in6_dev->cnf.accept_ra_defrtr)
+				continue;
+			if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
+				continue;
+			rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
+				      &ipv6_hdr(skb)->saddr);
+		}
+	}
+
+skip_routeinfo:
+#endif
+
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+	/* skip link-specific ndopts from interior routers */
+	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+		goto out;
+#endif
+
+	if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
 		struct nd_opt_hdr *p;
 		for (p = ndopts.nd_opts_pi;
 		     p;
 		     p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
-			addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
+			addrconf_prefix_rcv(skb->dev, (u8 *)p,
+					    (p->nd_opt_len) << 3,
+					    ndopts.nd_opts_src_lladdr != NULL);
 		}
 	}
 
 	if (ndopts.nd_opts_mtu) {
+		__be32 n;
 		u32 mtu;
 
-		memcpy(&mtu, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
-		mtu = ntohl(mtu);
+		memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
+		mtu = ntohl(n);
 
 		if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
-			ND_PRINTK2(KERN_WARNING
-				   "ICMPv6 RA: invalid mtu: %d\n",
-				   mtu);
+			ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
 		} else if (in6_dev->cnf.mtu6 != mtu) {
 			in6_dev->cnf.mtu6 = mtu;
 
 			if (rt)
-				rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+				dst_metric_set(&rt->dst, RTAX_MTU, mtu);
 
 			rt6_mtu_change(skb->dev, mtu);
 		}
 	}
-			
+
+	if (ndopts.nd_useropts) {
+		struct nd_opt_hdr *p;
+		for (p = ndopts.nd_useropts;
+		     p;
+		     p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
+			ndisc_ra_useropt(skb, p);
+		}
+	}
+
 	if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 RA: invalid RA options");
+		ND_PRINTK(2, warn, "RA: invalid RA options\n");
 	}
 out:
-	if (rt)
-		dst_release(&rt->u.dst);
-	else if (neigh)
+	ip6_rt_put(rt);
+	if (neigh)
 		neigh_release(neigh);
-	in6_dev_put(in6_dev);
 }
 
 static void ndisc_redirect_rcv(struct sk_buff *skb)
 {
-	struct inet6_dev *in6_dev;
-	struct icmp6hdr *icmph;
-	struct in6_addr *dest;
-	struct in6_addr *target;	/* new first hop to destination */
-	struct neighbour *neigh;
-	int on_link = 0;
+	u8 *hdr;
 	struct ndisc_options ndopts;
-	int optlen;
-	u8 *lladdr = NULL;
-
-	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 Redirect: source address is not link-local.\n");
+	struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				    offsetof(struct rd_msg, opt));
+
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+	switch (skb->ndisc_nodetype) {
+	case NDISC_NODETYPE_HOST:
+	case NDISC_NODETYPE_NODEFAULT:
+		ND_PRINTK(2, warn,
+			  "Redirect: from host or unauthorized router\n");
 		return;
 	}
+#endif
 
-	optlen = skb->tail - skb->h.raw;
-	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
-
-	if (optlen < 0) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 Redirect: packet too short\n");
+	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
+		ND_PRINTK(2, warn,
+			  "Redirect: source address is not link-local\n");
 		return;
 	}
 
-	icmph = (struct icmp6hdr *) skb->h.raw;
-	target = (struct in6_addr *) (icmph + 1);
-	dest = target + 1;
-
-	if (ipv6_addr_is_multicast(dest)) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 Redirect: destination address is multicast.\n");
+	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
 		return;
-	}
 
-	if (ipv6_addr_equal(dest, target)) {
-		on_link = 1;
-	} else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
-		ND_PRINTK2(KERN_WARNING 
-			   "ICMPv6 Redirect: target address is not link-local.\n");
+	if (!ndopts.nd_opts_rh) {
+		ip6_redirect_no_header(skb, dev_net(skb->dev),
+					skb->dev->ifindex, 0);
 		return;
 	}
 
-	in6_dev = in6_dev_get(skb->dev);
-	if (!in6_dev)
-		return;
-	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
-		in6_dev_put(in6_dev);
+	hdr = (u8 *)ndopts.nd_opts_rh;
+	hdr += 8;
+	if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
 		return;
-	}
 
-	/* RFC2461 8.1: 
-	 *	The IP source address of the Redirect MUST be the same as the current
-	 *	first-hop router for the specified ICMP Destination Address.
-	 */
-		
-	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 Redirect: invalid ND options\n");
-		in6_dev_put(in6_dev);
-		return;
-	}
-	if (ndopts.nd_opts_tgt_lladdr) {
-		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
-					     skb->dev);
-		if (!lladdr) {
-			ND_PRINTK2(KERN_WARNING
-				   "ICMPv6 Redirect: invalid link-layer address length\n");
-			in6_dev_put(in6_dev);
-			return;
-		}
-	}
+	icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
+}
 
-	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
-	if (neigh) {
-		rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, 
-			     on_link);
-		neigh_release(neigh);
-	}
-	in6_dev_put(in6_dev);
+static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
+					   struct sk_buff *orig_skb,
+					   int rd_len)
+{
+	u8 *opt = skb_put(skb, rd_len);
+
+	memset(opt, 0, 8);
+	*(opt++) = ND_OPT_REDIRECT_HDR;
+	*(opt++) = (rd_len >> 3);
+	opt += 6;
+
+	memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
 }
 
-void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
-			 struct in6_addr *target)
+void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 {
-	struct sock *sk = ndisc_socket->sk;
-	int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+	struct net_device *dev = skb->dev;
+	struct net *net = dev_net(dev);
+	struct sock *sk = net->ipv6.ndisc_sk;
+	int optlen = 0;
+	struct inet_peer *peer;
 	struct sk_buff *buff;
-	struct icmp6hdr *icmph;
+	struct rd_msg *msg;
 	struct in6_addr saddr_buf;
-	struct in6_addr *addrp;
-	struct net_device *dev;
 	struct rt6_info *rt;
 	struct dst_entry *dst;
-	struct inet6_dev *idev;
-	struct flowi fl;
-	u8 *opt;
+	struct flowi6 fl6;
 	int rd_len;
-	int err;
-	int hlen;
 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+	bool ret;
 
-	dev = skb->dev;
-
-	if (ipv6_get_lladdr(dev, &saddr_buf)) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 Redirect: no link-local address on %s\n",
-			   dev->name);
- 		return;
- 	}
-
-	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
-
-	dst = ip6_route_output(NULL, &fl);
-	if (dst == NULL)
+	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
+		ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
+			  dev->name);
 		return;
+	}
 
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
-	if (err)
+	if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
+	    ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
+		ND_PRINTK(2, warn,
+			  "Redirect: target address is not link-local unicast\n");
 		return;
+	}
 
-	rt = (struct rt6_info *) dst;
+	icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
+			 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
 
-	if (rt->rt6i_flags & RTF_GATEWAY) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 Redirect: destination is not a neighbour.\n");
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (dst->error) {
 		dst_release(dst);
 		return;
 	}
-	if (!xrlim_allow(dst, 1*HZ)) {
-		dst_release(dst);
+	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+	if (IS_ERR(dst))
 		return;
+
+	rt = (struct rt6_info *) dst;
+
+	if (rt->rt6i_flags & RTF_GATEWAY) {
+		ND_PRINTK(2, warn,
+			  "Redirect: destination is not a neighbour\n");
+		goto release;
 	}
+	peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+	ret = inet_peer_xrlim_allow(peer, 1*HZ);
+	if (peer)
+		inet_putpeer(peer);
+	if (!ret)
+		goto release;
 
 	if (dev->addr_len) {
+		struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
+		if (!neigh) {
+			ND_PRINTK(2, warn,
+				  "Redirect: no neigh for target address\n");
+			goto release;
+		}
+
 		read_lock_bh(&neigh->lock);
 		if (neigh->nud_state & NUD_VALID) {
 			memcpy(ha_buf, neigh->ha, dev->addr_len);
 			read_unlock_bh(&neigh->lock);
 			ha = ha_buf;
-			len += ndisc_opt_addr_space(dev);
+			optlen += ndisc_opt_addr_space(dev);
 		} else
 			read_unlock_bh(&neigh->lock);
+
+		neigh_release(neigh);
 	}
 
 	rd_len = min_t(unsigned int,
-		     IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
+		       IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
+		       skb->len + 8);
 	rd_len &= ~0x7;
-	len += rd_len;
-
-	buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
-				   1, &err);
-	if (buff == NULL) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
-			   __FUNCTION__);
-		dst_release(dst);
-		return;
-	}
-
-	hlen = 0;
-
-	skb_reserve(buff, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
-		   IPPROTO_ICMPV6, len);
-
-	icmph = (struct icmp6hdr *)skb_put(buff, len);
-	buff->h.raw = (unsigned char*)icmph;
-
-	memset(icmph, 0, sizeof(struct icmp6hdr));
-	icmph->icmp6_type = NDISC_REDIRECT;
-
-	/*
-	 *	copy target and destination addresses
-	 */
-
-	addrp = (struct in6_addr *)(icmph + 1);
-	ipv6_addr_copy(addrp, target);
-	addrp++;
-	ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
-
-	opt = (u8*) (addrp + 1);
+	optlen += rd_len;
+
+	buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+	if (!buff)
+		goto release;
+
+	msg = (struct rd_msg *)skb_put(buff, sizeof(*msg));
+	*msg = (struct rd_msg) {
+		.icmph = {
+			.icmp6_type = NDISC_REDIRECT,
+		},
+		.target = *target,
+		.dest = ipv6_hdr(skb)->daddr,
+	};
 
 	/*
 	 *	include target_address option
 	 */
 
 	if (ha)
-		opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
-					     dev->addr_len, dev->type);
+		ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha);
 
 	/*
 	 *	build redirect option and copy skb over to the new packet.
 	 */
 
-	memset(opt, 0, 8);	
-	*(opt++) = ND_OPT_REDIRECT_HDR;
-	*(opt++) = (rd_len >> 3);
-	opt += 6;
-
-	memcpy(opt, skb->nh.ipv6h, rd_len - 8);
+	if (rd_len)
+		ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
 
-	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr,
-					     len, IPPROTO_ICMPV6,
-					     csum_partial((u8 *) icmph, len, 0));
-
-	buff->dst = dst;
-	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output);
-	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTREDIRECTS);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
-	}
+	skb_dst_set(buff, dst);
+	ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
+	return;
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+release:
+	dst_release(dst);
 }
 
 static void pndisc_redo(struct sk_buff *skb)
@@ -1454,28 +1523,43 @@ static void pndisc_redo(struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
+{
+	struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+	if (!idev)
+		return true;
+	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
+	    idev->cnf.suppress_frag_ndisc) {
+		net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
+		return true;
+	}
+	return false;
+}
+
 int ndisc_rcv(struct sk_buff *skb)
 {
 	struct nd_msg *msg;
 
-	if (!pskb_may_pull(skb, skb->len))
+	if (ndisc_suppress_frag_ndisc(skb))
+		return 0;
+
+	if (skb_linearize(skb))
 		return 0;
 
-	msg = (struct nd_msg *) skb->h.raw;
+	msg = (struct nd_msg *)skb_transport_header(skb);
 
-	__skb_push(skb, skb->data-skb->h.raw);
+	__skb_push(skb, skb->data - skb_transport_header(skb));
 
-	if (skb->nh.ipv6h->hop_limit != 255) {
-		ND_PRINTK2(KERN_WARNING
-			   "ICMPv6 NDISC: invalid hop-limit: %d\n",
-			   skb->nh.ipv6h->hop_limit);
+	if (ipv6_hdr(skb)->hop_limit != 255) {
+		ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n",
+			  ipv6_hdr(skb)->hop_limit);
 		return 0;
 	}
 
 	if (msg->icmph.icmp6_code != 0) {
-		ND_PRINTK2(KERN_WARNING 
-			   "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
-			   msg->icmph.icmp6_code);
+		ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n",
+			  msg->icmph.icmp6_code);
 		return 0;
 	}
 
@@ -1501,23 +1585,34 @@ int ndisc_rcv(struct sk_buff *skb)
 	case NDISC_REDIRECT:
 		ndisc_redirect_rcv(skb);
 		break;
-	};
+	}
 
 	return 0;
 }
 
 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-	struct net_device *dev = ptr;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct inet6_dev *idev;
 
 	switch (event) {
 	case NETDEV_CHANGEADDR:
 		neigh_changeaddr(&nd_tbl, dev);
-		fib6_run_gc(~0UL);
+		fib6_run_gc(0, net, false);
+		idev = in6_dev_get(dev);
+		if (!idev)
+			break;
+		if (idev->cnf.ndisc_notify)
+			ndisc_send_unsol_na(dev);
+		in6_dev_put(idev);
 		break;
 	case NETDEV_DOWN:
 		neigh_ifdown(&nd_tbl, dev);
-		fib6_run_gc(~0UL);
+		fib6_run_gc(0, net, false);
+		break;
+	case NETDEV_NOTIFY_PEERS:
+		ndisc_send_unsol_na(dev);
 		break;
 	default:
 		break;
@@ -1538,11 +1633,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
 	static int warned;
 	if (strcmp(warncomm, current->comm) && warned < 5) {
 		strcpy(warncomm, current->comm);
-		printk(KERN_WARNING
-			"process `%s' is using deprecated sysctl (%s) "
-			"net.ipv6.neigh.%s.%s; "
-			"Use net.ipv6.neigh.%s.%s_ms "
-			"instead.\n",
+		pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
 			warncomm, func,
 			dev_name, ctl->procname,
 			dev_name, ctl->procname);
@@ -1550,37 +1641,34 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
 	}
 }
 
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net_device *dev = ctl->extra1;
 	struct inet6_dev *idev;
 	int ret;
 
-	if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
-	    ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
+	if ((strcmp(ctl->procname, "retrans_time") == 0) ||
+	    (strcmp(ctl->procname, "base_reachable_time") == 0))
 		ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
 
-	switch (ctl->ctl_name) {
-	case NET_NEIGH_RETRANS_TIME:
-		ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-		break;
-	case NET_NEIGH_REACHABLE_TIME:
-		ret = proc_dointvec_jiffies(ctl, write,
-					    filp, buffer, lenp, ppos);
-		break;
-	case NET_NEIGH_RETRANS_TIME_MS:
-	case NET_NEIGH_REACHABLE_TIME_MS:
-		ret = proc_dointvec_ms_jiffies(ctl, write,
-					       filp, buffer, lenp, ppos);
-		break;
-	default:
+	if (strcmp(ctl->procname, "retrans_time") == 0)
+		ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);
+
+	else if (strcmp(ctl->procname, "base_reachable_time") == 0)
+		ret = neigh_proc_dointvec_jiffies(ctl, write,
+						  buffer, lenp, ppos);
+
+	else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
+		 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
+		ret = neigh_proc_dointvec_ms_jiffies(ctl, write,
+						     buffer, lenp, ppos);
+	else
 		ret = -1;
-	}
 
 	if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
-		if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
-		    ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
-			idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
+		if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
+			idev->nd_parms->reachable_time =
+					neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
 		idev->tstamp = jiffies;
 		inet6_ifinfo_notify(RTM_NEWLINK, idev);
 		in6_dev_put(idev);
@@ -1588,89 +1676,80 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
 	return ret;
 }
 
-static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
-					int nlen, void __user *oldval,
-					size_t __user *oldlenp,
-					void __user *newval, size_t newlen,
-					void **context)
-{
-	struct net_device *dev = ctl->extra1;
-	struct inet6_dev *idev;
-	int ret;
-
-	if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
-	    ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
-		ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
-
-	switch (ctl->ctl_name) {
-	case NET_NEIGH_REACHABLE_TIME:
-		ret = sysctl_jiffies(ctl, name, nlen,
-				     oldval, oldlenp, newval, newlen,
-				     context);
-		break;
-	case NET_NEIGH_RETRANS_TIME_MS:
-	case NET_NEIGH_REACHABLE_TIME_MS:
-		 ret = sysctl_ms_jiffies(ctl, name, nlen,
-					 oldval, oldlenp, newval, newlen,
-					 context);
-		 break;
-	default:
-		ret = 0;
-	}
-
-	if (newval && newlen && ret > 0 &&
-	    dev && (idev = in6_dev_get(dev)) != NULL) {
-		if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
-		    ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
-			idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
-		idev->tstamp = jiffies;
-		inet6_ifinfo_notify(RTM_NEWLINK, idev);
-		in6_dev_put(idev);
-	}
-
-	return ret;
-}
 
 #endif
 
-int __init ndisc_init(struct net_proto_family *ops)
+static int __net_init ndisc_net_init(struct net *net)
 {
 	struct ipv6_pinfo *np;
 	struct sock *sk;
-        int err;
+	int err;
 
-	err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &ndisc_socket);
+	err = inet_ctl_sock_create(&sk, PF_INET6,
+				   SOCK_RAW, IPPROTO_ICMPV6, net);
 	if (err < 0) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n", 
-			   err);
-		ndisc_socket = NULL; /* For safety. */
+		ND_PRINTK(0, err,
+			  "NDISC: Failed to initialize the control socket (err %d)\n",
+			  err);
 		return err;
 	}
 
-	sk = ndisc_socket->sk;
+	net->ipv6.ndisc_sk = sk;
+
 	np = inet6_sk(sk);
-	sk->sk_allocation = GFP_ATOMIC;
 	np->hop_limit = 255;
 	/* Do not loopback ndisc messages */
 	np->mc_loop = 0;
-	sk->sk_prot->unhash(sk);
 
-        /*
-         * Initialize the neighbour table
-         */
-	
+	return 0;
+}
+
+static void __net_exit ndisc_net_exit(struct net *net)
+{
+	inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
+}
+
+static struct pernet_operations ndisc_net_ops = {
+	.init = ndisc_net_init,
+	.exit = ndisc_net_exit,
+};
+
+int __init ndisc_init(void)
+{
+	int err;
+
+	err = register_pernet_subsys(&ndisc_net_ops);
+	if (err)
+		return err;
+	/*
+	 * Initialize the neighbour table
+	 */
 	neigh_table_init(&nd_tbl);
 
 #ifdef CONFIG_SYSCTL
-	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
-			      "ipv6",
-			      &ndisc_ifinfo_sysctl_change,
-			      &ndisc_ifinfo_sysctl_strategy);
+	err = neigh_sysctl_register(NULL, &nd_tbl.parms,
+				    &ndisc_ifinfo_sysctl_change);
+	if (err)
+		goto out_unregister_pernet;
+out:
 #endif
+	return err;
 
-	register_netdevice_notifier(&ndisc_netdev_notifier);
-	return 0;
+#ifdef CONFIG_SYSCTL
+out_unregister_pernet:
+	unregister_pernet_subsys(&ndisc_net_ops);
+	goto out;
+#endif
+}
+
+int __init ndisc_late_init(void)
+{
+	return register_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_late_cleanup(void)
+{
+	unregister_netdevice_notifier(&ndisc_netdev_notifier);
 }
 
 void ndisc_cleanup(void)
@@ -1679,6 +1758,5 @@ void ndisc_cleanup(void)
 	neigh_sysctl_unregister(&nd_tbl.parms);
 #endif
 	neigh_table_clear(&nd_tbl);
-	sock_release(ndisc_socket);
-	ndisc_socket = NULL; /* For safety. */
+	unregister_pernet_subsys(&ndisc_net_ops);
 }
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index f8626ebf90f..d38e6a8d8b9 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -1,42 +1,69 @@
-#include <linux/config.h>
-#include <linux/init.h>
-
-#ifdef CONFIG_NETFILTER
-
+/*
+ * IPv6 specific functions of netfilter core
+ *
+ * Rusty Russell (C) 2000 -- This code is GPL.
+ * Patrick McHardy (C) 2006-2012
+ */
 #include <linux/kernel.h>
+#include <linux/init.h>
 #include <linux/ipv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/export.h>
+#include <net/addrconf.h>
 #include <net/dst.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/xfrm.h>
+#include <net/ip6_checksum.h>
+#include <net/netfilter/nf_queue.h>
 
 int ip6_route_me_harder(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct net *net = dev_net(skb_dst(skb)->dev);
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	unsigned int hh_len;
 	struct dst_entry *dst;
-	struct flowi fl = {
-		.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
-		.nl_u =
-		{ .ip6_u =
-		  { .daddr = iph->daddr,
-		    .saddr = iph->saddr, } },
-		.proto = iph->nexthdr,
+	struct flowi6 fl6 = {
+		.flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+		.flowi6_mark = skb->mark,
+		.daddr = iph->daddr,
+		.saddr = iph->saddr,
 	};
+	int err;
 
-	dst = ip6_route_output(skb->sk, &fl);
-
-	if (dst->error) {
-		IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+	dst = ip6_route_output(net, skb->sk, &fl6);
+	err = dst->error;
+	if (err) {
+		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
 		dst_release(dst);
-		return -EINVAL;
+		return err;
 	}
 
 	/* Drop old route. */
-	dst_release(skb->dst);
+	skb_dst_drop(skb);
+
+	skb_dst_set(skb, dst);
+
+#ifdef CONFIG_XFRM
+	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+	    xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
+		skb_dst_set(skb, NULL);
+		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
+		if (IS_ERR(dst))
+			return PTR_ERR(dst);
+		skb_dst_set(skb, dst);
+	}
+#endif
+
+	/* Change in oif may mean change in hh_len. */
+	hh_len = skb_dst(skb)->dev->hard_header_len;
+	if (skb_headroom(skb) < hh_len &&
+	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
+			     0, GFP_ATOMIC))
+		return -ENOMEM;
 
-	skb->dst = dst;
 	return 0;
 }
 EXPORT_SYMBOL(ip6_route_me_harder);
@@ -49,56 +76,144 @@ EXPORT_SYMBOL(ip6_route_me_harder);
 struct ip6_rt_info {
 	struct in6_addr daddr;
 	struct in6_addr saddr;
+	u_int32_t mark;
 };
 
-static void save(const struct sk_buff *skb, struct nf_info *info)
+static void nf_ip6_saveroute(const struct sk_buff *skb,
+			     struct nf_queue_entry *entry)
 {
-	struct ip6_rt_info *rt_info = nf_info_reroute(info);
+	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (info->hook == NF_IP6_LOCAL_OUT) {
-		struct ipv6hdr *iph = skb->nh.ipv6h;
+	if (entry->hook == NF_INET_LOCAL_OUT) {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		rt_info->daddr = iph->daddr;
 		rt_info->saddr = iph->saddr;
+		rt_info->mark = skb->mark;
 	}
 }
 
-static int reroute(struct sk_buff **pskb, const struct nf_info *info)
+static int nf_ip6_reroute(struct sk_buff *skb,
+			  const struct nf_queue_entry *entry)
 {
-	struct ip6_rt_info *rt_info = nf_info_reroute(info);
+	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (info->hook == NF_IP6_LOCAL_OUT) {
-		struct ipv6hdr *iph = (*pskb)->nh.ipv6h;
+	if (entry->hook == NF_INET_LOCAL_OUT) {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
-		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
-			return ip6_route_me_harder(*pskb);
+		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
+		    skb->mark != rt_info->mark)
+			return ip6_route_me_harder(skb);
 	}
 	return 0;
 }
 
-static struct nf_queue_rerouter ip6_reroute = {
-	.rer_size	= sizeof(struct ip6_rt_info),
-	.save 		= &save,
-	.reroute	= &reroute,
-};
-
-int __init ipv6_netfilter_init(void)
+static int nf_ip6_route(struct net *net, struct dst_entry **dst,
+			struct flowi *fl, bool strict)
 {
-	return nf_register_queue_rerouter(PF_INET6, &ip6_reroute);
+	static const struct ipv6_pinfo fake_pinfo;
+	static const struct inet_sock fake_sk = {
+		/* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
+		.sk.sk_bound_dev_if = 1,
+		.pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
+	};
+	const void *sk = strict ? &fake_sk : NULL;
+	struct dst_entry *result;
+	int err;
+
+	result = ip6_route_output(net, sk, &fl->u.ip6);
+	err = result->error;
+	if (err)
+		dst_release(result);
+	else
+		*dst = result;
+	return err;
 }
 
-void ipv6_netfilter_fini(void)
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+			     unsigned int dataoff, u_int8_t protocol)
 {
-	nf_unregister_queue_rerouter(PF_INET6);
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+			break;
+		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+				     skb->len - dataoff, protocol,
+				     csum_sub(skb->csum,
+					      skb_checksum(skb, 0,
+							   dataoff, 0)))) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = ~csum_unfold(
+				csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					     skb->len - dataoff,
+					     protocol,
+					     csum_sub(0,
+						      skb_checksum(skb, 0,
+								   dataoff, 0))));
+		csum = __skb_checksum_complete(skb);
+	}
+	return csum;
 }
+EXPORT_SYMBOL(nf_ip6_checksum);
+
+static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
+				       unsigned int dataoff, unsigned int len,
+				       u_int8_t protocol)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__wsum hsum;
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (len == skb->len - dataoff)
+			return nf_ip6_checksum(skb, hook, dataoff, protocol);
+		/* fall through */
+	case CHECKSUM_NONE:
+		hsum = skb_checksum(skb, 0, dataoff, 0);
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+							 &ip6h->daddr,
+							 skb->len - dataoff,
+							 protocol,
+							 csum_sub(0, hsum)));
+		skb->ip_summed = CHECKSUM_NONE;
+		return __skb_checksum_complete_head(skb, dataoff + len);
+	}
+	return csum;
+};
+
+static const struct nf_ipv6_ops ipv6ops = {
+	.chk_addr	= ipv6_chk_addr,
+};
+
+static const struct nf_afinfo nf_ip6_afinfo = {
+	.family			= AF_INET6,
+	.checksum		= nf_ip6_checksum,
+	.checksum_partial	= nf_ip6_checksum_partial,
+	.route			= nf_ip6_route,
+	.saveroute		= nf_ip6_saveroute,
+	.reroute		= nf_ip6_reroute,
+	.route_key_size		= sizeof(struct ip6_rt_info),
+};
 
-#else /* CONFIG_NETFILTER */
 int __init ipv6_netfilter_init(void)
 {
-	return 0;
+	RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
+	return nf_register_afinfo(&nf_ip6_afinfo);
 }
 
+/* This can be called from inet6_init() on errors, so it cannot
+ * be marked __exit. -DaveM
+ */
 void ipv6_netfilter_fini(void)
 {
+	RCU_INIT_POINTER(nf_ipv6_ops, NULL);
+	nf_unregister_afinfo(&nf_ip6_afinfo);
 }
-#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index bb7ccfe33f2..4bff1f297e3 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -2,35 +2,64 @@
 # IP netfilter configuration
 #
 
-menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
-	depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
-
-#tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP6_NF_CONNTRACK
-#if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
-#  dep_tristate '  FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK
-#fi
-config IP6_NF_QUEUE
-	tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
+menu "IPv6: Netfilter Configuration"
+	depends on INET && IPV6 && NETFILTER
+
+config NF_DEFRAG_IPV6
+	tristate
+	default n
+
+config NF_CONNTRACK_IPV6
+	tristate "IPv6 connection tracking support"
+	depends on INET && IPV6 && NF_CONNTRACK
+	default m if NETFILTER_ADVANCED=n
+	select NF_DEFRAG_IPV6
 	---help---
+	  Connection tracking keeps a record of what packets have passed
+	  through your machine, in order to figure out how they are related
+	  into connections.
 
-	  This option adds a queue handler to the kernel for IPv6
-	  packets which enables users to receive the filtered packets
-	  with QUEUE target using libipq.
+	  This is IPv6 support on Layer 3 independent connection tracking.
+	  Layer 3 independent connection tracking is experimental scheme
+	  which generalize ip_conntrack to support other layer 3 protocols.
 
-	  THis option enables the old IPv6-only "ip6_queue" implementation
-	  which has been obsoleted by the new "nfnetlink_queue" code (see
-	  CONFIG_NETFILTER_NETLINK_QUEUE).
+	  To compile it as a module, choose M here.  If unsure, say N.
 
-	  (C) Fernando Anton 2001
-	  IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
-	  Universidad Carlos III de Madrid
-	  Universidad Politecnica de Alcala de Henares
-	  email: <fanton@it.uc3m.es>.
+config NF_TABLES_IPV6
+	depends on NF_TABLES
+	tristate "IPv6 nf_tables support"
+	help
+	  This option enables the IPv6 support for nf_tables.
 
-	  To compile it as a module, choose M here.  If unsure, say N.
+config NFT_CHAIN_ROUTE_IPV6
+	depends on NF_TABLES_IPV6
+	tristate "IPv6 nf_tables route chain support"
+	help
+	  This option enables the "route" chain for IPv6 in nf_tables. This
+	  chain type is used to force packet re-routing after mangling header
+	  fields such as the source, destination, flowlabel, hop-limit and
+	  the packet mark.
+
+config NFT_CHAIN_NAT_IPV6
+	depends on NF_TABLES_IPV6
+	depends on NF_NAT_IPV6 && NFT_NAT
+	tristate "IPv6 nf_tables nat chain support"
+	help
+	  This option enables the "nat" chain for IPv6 in nf_tables. This
+	  chain type is used to perform Network Address Translation (NAT)
+	  packet transformations such as the source, destination address and
+	  source and destination ports.
+
+config NFT_REJECT_IPV6
+	depends on NF_TABLES_IPV6
+	default NFT_REJECT
+	tristate
 
 config IP6_NF_IPTABLES
-	tristate "IP6 tables support (required for filtering/masq/NAT)"
+	tristate "IP6 tables support (required for filtering)"
+	depends on INET && IPV6
+	select NETFILTER_XTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  ip6tables is a general, extensible packet identification framework.
 	  Currently only the packet filtering and packet mangling subsystem
@@ -39,150 +68,103 @@ config IP6_NF_IPTABLES
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+if IP6_NF_IPTABLES
+
 # The simple matches.
-config IP6_NF_MATCH_LIMIT
-	tristate "limit match support"
-	depends on IP6_NF_IPTABLES
+config IP6_NF_MATCH_AH
+	tristate '"ah" match support'
+	depends on NETFILTER_ADVANCED
 	help
-	  limit matching allows you to control the rate at which a rule can be
-	  matched: mainly useful in combination with the LOG target ("LOG
-	  target support", below) and to avoid some Denial of Service attacks.
+	  This module allows one to match AH packets.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_MAC
-	tristate "MAC address match support"
-	depends on IP6_NF_IPTABLES
+config IP6_NF_MATCH_EUI64
+	tristate '"eui64" address check'
+	depends on NETFILTER_ADVANCED
 	help
-	  mac matching allows you to match packets based on the source
-	  Ethernet address of the packet.
+	  This module performs checking on the IPv6 source address
+	  Compares the last 64 bits with the EUI64 (delivered
+	  from the MAC address) address
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_RT
-	tristate "Routing header match support"
-	depends on IP6_NF_IPTABLES
+config IP6_NF_MATCH_FRAG
+	tristate '"frag" Fragmentation header match support'
+	depends on NETFILTER_ADVANCED
 	help
-	  rt matching allows you to match packets based on the routing
+	  frag matching allows you to match packets based on the fragmentation
 	  header of the packet.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_OPTS
-	tristate "Hop-by-hop and Dst opts header match support"
-	depends on IP6_NF_IPTABLES
+	tristate '"hbh" hop-by-hop and "dst" opts header match support'
+	depends on NETFILTER_ADVANCED
 	help
 	  This allows one to match packets based on the hop-by-hop
 	  and destination options headers of a packet.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_FRAG
-	tristate "Fragmentation header match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  frag matching allows you to match packets based on the fragmentation
-	  header of the packet.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_MATCH_HL
-	tristate "HL match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  HL matching allows you to match packets based on the hop
-	  limit of the packet.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP6_NF_MATCH_MULTIPORT
-	tristate "Multiple port match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  Multiport matching allows you to match TCP or UDP packets based on
-	  a series of source or destination ports: normally a rule can only
-	  match a single range of ports.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP6_NF_MATCH_OWNER
-	tristate "Owner match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  Packet owner matching allows you to match locally-generated packets
-	  based on who created them: the user, group, process or session.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-#  dep_tristate '  MAC address match support' CONFIG_IP6_NF_MATCH_MAC $CONFIG_IP6_NF_IPTABLES
-config IP6_NF_MATCH_MARK
-	tristate "netfilter MARK match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  Netfilter mark matching allows you to match packets based on the
-	  `nfmark' value in the packet.  This can be set by the MARK target
-	  (see below).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	tristate '"hl" hoplimit match support'
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MATCH_HL
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MATCH_HL.
 
 config IP6_NF_MATCH_IPV6HEADER
-	tristate "IPv6 Extension Headers Match"
-	depends on IP6_NF_IPTABLES
+	tristate '"ipv6header" IPv6 Extension Headers Match'
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This module allows one to match packets based upon
 	  the ipv6 extension headers.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_AHESP
-	tristate "AH/ESP match support"
-	depends on IP6_NF_IPTABLES
+config IP6_NF_MATCH_MH
+	tristate '"mh" match support'
+	depends on NETFILTER_ADVANCED
 	help
-	  This module allows one to match AH and ESP packets.
+	  This module allows one to match MH packets.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_LENGTH
-	tristate "Packet Length match support"
-	depends on IP6_NF_IPTABLES
-	help
-	  This option allows you to match the length of a packet against a
-	  specific value or range of values.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP6_NF_MATCH_EUI64
-	tristate "EUI64 address check"
-	depends on IP6_NF_IPTABLES
-	help
-	  This module performs checking on the IPv6 source address
-	  Compares the last 64 bits with the EUI64 (delivered
-	  from the MAC address) address
+config IP6_NF_MATCH_RPFILTER
+	tristate '"rpfilter" reverse path filter match support'
+	depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)
+	---help---
+	  This option allows you to match packets whose replies would
+	  go out via the interface the packet came in.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
+	  The module will be called ip6t_rpfilter.
 
-config IP6_NF_MATCH_PHYSDEV
-	tristate "Physdev match support"
-	depends on IP6_NF_IPTABLES && BRIDGE_NETFILTER
+config IP6_NF_MATCH_RT
+	tristate '"rt" Routing header match support'
+	depends on NETFILTER_ADVANCED
 	help
-	  Physdev packet matching matches against the physical bridge ports
-	  the IP packet arrived on or will leave by.
+	  rt matching allows you to match packets based on the routing
+	  header of the packet.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-#  dep_tristate '  Multiple port match support' CONFIG_IP6_NF_MATCH_MULTIPORT $CONFIG_IP6_NF_IPTABLES
-#  dep_tristate '  TOS match support' CONFIG_IP6_NF_MATCH_TOS $CONFIG_IP6_NF_IPTABLES
-#  if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
-#    dep_tristate '  Connection state match support' CONFIG_IP6_NF_MATCH_STATE $CONFIG_IP6_NF_CONNTRACK $CONFIG_IP6_NF_IPTABLES 
-#  fi
-#  if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
-#    dep_tristate '  Unclean match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_UNCLEAN $CONFIG_IP6_NF_IPTABLES
-#    dep_tristate '  Owner match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_OWNER $CONFIG_IP6_NF_IPTABLES
-#  fi
 # The targets
+config IP6_NF_TARGET_HL
+	tristate '"HL" hoplimit target support'
+	depends on NETFILTER_ADVANCED && IP6_NF_MANGLE
+	select NETFILTER_XT_TARGET_HL
+	---help---
+	This is a backwards-compatible option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_TARGET_HL.
+
 config IP6_NF_FILTER
 	tristate "Packet filtering"
-	depends on IP6_NF_IPTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  Packet filtering defines a table `filter', which has a series of
 	  rules for simple packet filtering at local input, forwarding and
@@ -190,18 +172,10 @@ config IP6_NF_FILTER
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_TARGET_LOG
-	tristate "LOG target support"
-	depends on IP6_NF_FILTER
-	help
-	  This option adds a `LOG' target, which allows you to create rules in
-	  any iptables table which records the packet header to the syslog.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_TARGET_REJECT
 	tristate "REJECT target support"
 	depends on IP6_NF_FILTER
+	default m if NETFILTER_ADVANCED=n
 	help
 	  The REJECT target allows a filtering rule to specify that an ICMPv6
 	  error should be issued in response to an incoming packet, rather
@@ -209,26 +183,22 @@ config IP6_NF_TARGET_REJECT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_TARGET_NFQUEUE
-	tristate "NFQUEUE Target Support"
-	depends on IP_NF_IPTABLES
+config IP6_NF_TARGET_SYNPROXY
+	tristate "SYNPROXY target support"
+	depends on NF_CONNTRACK && NETFILTER_ADVANCED
+	select NETFILTER_SYNPROXY
+	select SYN_COOKIES
 	help
-	  This Target replaced the old obsolete QUEUE target.
+	  The SYNPROXY target allows you to intercept TCP connections and
+	  establish them using syncookies before they are passed on to the
+	  server. This allows to avoid conntrack and server resource usage
+	  during SYN-flood attacks.
 
-	  As opposed to QUEUE, it supports 65535 different queues,
-	  not just one.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	  To compile it as a module, choose M here. If unsure, say N.
 
-#  if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then
-#    dep_tristate '    REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER
-#    if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
-#      dep_tristate '    MIRROR target support (EXPERIMENTAL)' CONFIG_IP6_NF_TARGET_MIRROR $CONFIG_IP6_NF_FILTER
-#    fi
-#  fi
 config IP6_NF_MANGLE
 	tristate "Packet mangling"
-	depends on IP6_NF_IPTABLES
+	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `mangle' table to iptables: see the man page for
 	  iptables(8).  This table is used for various packet alterations
@@ -236,47 +206,63 @@ config IP6_NF_MANGLE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-#    dep_tristate '    TOS target support' CONFIG_IP6_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE
-config IP6_NF_TARGET_MARK
-	tristate "MARK target support"
-	depends on IP6_NF_MANGLE
+config IP6_NF_RAW
+	tristate  'raw table support (required for TRACE)'
+	help
+	  This option adds a `raw' table to ip6tables. This table is the very
+	  first in the netfilter framework and hooks in at the PREROUTING
+	  and OUTPUT chains.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+
+# security table for MAC policy
+config IP6_NF_SECURITY
+       tristate "Security table"
+       depends on SECURITY
+       depends on NETFILTER_ADVANCED
+       help
+         This option adds a `security' table to iptables, for use
+         with Mandatory Access Control (MAC) policy.
+
+         If unsure, say N.
+
+config NF_NAT_IPV6
+	tristate "IPv6 NAT"
+	depends on NF_CONNTRACK_IPV6
+	depends on NETFILTER_ADVANCED
+	select NF_NAT
 	help
-	  This option adds a `MARK' target, which allows you to create rules
-	  in the `mangle' table which alter the netfilter mark (nfmark) field
-	  associated with the packet packet prior to routing. This can change
-	  the routing method (see `Use netfilter MARK value as routing
-	  key') and can also be used by other subsystems to change their
-	  behavior.
+	  The IPv6 NAT option allows masquerading, port forwarding and other
+	  forms of full Network Address Port Translation. It is controlled by
+	  the `nat' table in ip6tables, see the man page for ip6tables(8).
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_TARGET_HL
-	tristate  'HL (hoplimit) target support'
-	depends on IP6_NF_MANGLE
+if NF_NAT_IPV6
+
+config IP6_NF_TARGET_MASQUERADE
+	tristate "MASQUERADE target support"
 	help
-	  This option adds a `HL' target, which enables the user to decrement
-	  the hoplimit value of the IPv6 header or set it to a given (lower)
-	  value.
-	
-	  While it is safe to decrement the hoplimit value, this option also
-	  enables functionality to increment and set the hoplimit value of the
-	  IPv6 header to arbitrary values.  This is EXTREMELY DANGEROUS since
-	  you can easily create immortal packets that loop forever on the
-	  network.  
+	  Masquerading is a special case of NAT: all outgoing connections are
+	  changed to seem to come from a particular interface's address, and
+	  if the interface goes down, those connections are lost.  This is
+	  only useful for dialup accounts with dynamic IP address (ie. your IP
+	  address will be different on next dialup).
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-#dep_tristate '  LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES
-config IP6_NF_RAW
-	tristate  'raw table support (required for TRACE)'
-	depends on IP6_NF_IPTABLES
+config IP6_NF_TARGET_NPT
+	tristate "NPT (Network Prefix translation) target support"
 	help
-	  This option adds a `raw' table to ip6tables. This table is the very
-	  first in the netfilter framework and hooks in at the PREROUTING
-	  and OUTPUT chains.
-	
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+	  This option adds the `SNPT' and `DNPT' target, which perform
+	  stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+endif # NF_NAT_IPV6
+
+endif # IP6_NF_IPTABLES
 
 endmenu
 
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b2c370e8b1..70d3dd66f2c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -4,26 +4,43 @@
 
 # Link order matters here.
 obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
-obj-$(CONFIG_IP6_NF_MATCH_LIMIT) += ip6t_limit.o
-obj-$(CONFIG_IP6_NF_MATCH_MARK) += ip6t_mark.o
-obj-$(CONFIG_IP6_NF_MATCH_LENGTH) += ip6t_length.o
-obj-$(CONFIG_IP6_NF_MATCH_MAC) += ip6t_mac.o
-obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
-obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
-obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
-obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
-obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o
-obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
-obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o
-obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
-obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o
 obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
 obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
-obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o
-obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o
-obj-$(CONFIG_IP6_NF_TARGET_NFQUEUE) += ip6t_NFQUEUE.o
-obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
-obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
-obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
+obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
+obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
+
+# objects for l3 independent conntrack
+nf_conntrack_ipv6-y  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
+
+# l3 independent conntrack
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
+
+nf_nat_ipv6-y		:= nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
+obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
+
+# defrag
+nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
+obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+
+# nf_tables
+obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
+obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
+
+# matches
+obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
+obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
+obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
+obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
+obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
+obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
+obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
+obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
+
+# targets
+obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
+obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
 obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
deleted file mode 100644
index 5027bbe6415..00000000000
--- a/net/ipv6/netfilter/ip6_queue.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- * This is a module which is used for queueing IPv6 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2001 Fernando Anton, this code is GPL.
- *     IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
- *     Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
- *     Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
- *     email: fanton@it.uc3m.es
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
- *             to adapt it to IPv6
- *             HEAVILY based in ipqueue.c by James Morris. It's just
- *             a little modified version of it, so he's nearly the
- *             real coder of this.
- *             Few changes needed, mainly the hard_routing code and
- *             the netlink socket protocol (we're NETLINK_IP6_FW).
- * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
- * 2005-02-04: Added /proc counter for dropped packets; fixed so
- *             packets aren't delivered to user space if they're going
- *             to be dropped.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ipv6.h>
-#include <linux/notifier.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <net/sock.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-#define IPQ_QMAX_DEFAULT 1024
-#define IPQ_PROC_FS_NAME "ip6_queue"
-#define NET_IPQ_QMAX 2088
-#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
-
-struct ipq_queue_entry {
-	struct list_head list;
-	struct nf_info *info;
-	struct sk_buff *skb;
-};
-
-typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
-
-static unsigned char copy_mode = IPQ_COPY_NONE;
-static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
-static DEFINE_RWLOCK(queue_lock);
-static int peer_pid;
-static unsigned int copy_range;
-static unsigned int queue_total;
-static unsigned int queue_dropped = 0;
-static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl;
-static LIST_HEAD(queue_list);
-static DECLARE_MUTEX(ipqnl_sem);
-
-static void
-ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
-{
-	local_bh_disable();
-	nf_reinject(entry->skb, entry->info, verdict);
-	local_bh_enable();
-	kfree(entry);
-}
-
-static inline void
-__ipq_enqueue_entry(struct ipq_queue_entry *entry)
-{
-       list_add(&entry->list, &queue_list);
-       queue_total++;
-}
-
-/*
- * Find and return a queued entry matched by cmpfn, or return the last
- * entry if cmpfn is NULL.
- */
-static inline struct ipq_queue_entry *
-__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct list_head *p;
-
-	list_for_each_prev(p, &queue_list) {
-		struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
-		
-		if (!cmpfn || cmpfn(entry, data))
-			return entry;
-	}
-	return NULL;
-}
-
-static inline void
-__ipq_dequeue_entry(struct ipq_queue_entry *entry)
-{
-	list_del(&entry->list);
-	queue_total--;
-}
-
-static inline struct ipq_queue_entry *
-__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct ipq_queue_entry *entry;
-
-	entry = __ipq_find_entry(cmpfn, data);
-	if (entry == NULL)
-		return NULL;
-
-	__ipq_dequeue_entry(entry);
-	return entry;
-}
-
-
-static inline void
-__ipq_flush(int verdict)
-{
-	struct ipq_queue_entry *entry;
-	
-	while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
-		ipq_issue_verdict(entry, verdict);
-}
-
-static inline int
-__ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status = 0;
-	
-	switch(mode) {
-	case IPQ_COPY_NONE:
-	case IPQ_COPY_META:
-		copy_mode = mode;
-		copy_range = 0;
-		break;
-		
-	case IPQ_COPY_PACKET:
-		copy_mode = mode;
-		copy_range = range;
-		if (copy_range > 0xFFFF)
-			copy_range = 0xFFFF;
-		break;
-		
-	default:
-		status = -EINVAL;
-
-	}
-	return status;
-}
-
-static inline void
-__ipq_reset(void)
-{
-	peer_pid = 0;
-	net_disable_timestamp();
-	__ipq_set_mode(IPQ_COPY_NONE, 0);
-	__ipq_flush(NF_DROP);
-}
-
-static struct ipq_queue_entry *
-ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct ipq_queue_entry *entry;
-	
-	write_lock_bh(&queue_lock);
-	entry = __ipq_find_dequeue_entry(cmpfn, data);
-	write_unlock_bh(&queue_lock);
-	return entry;
-}
-
-static void
-ipq_flush(int verdict)
-{
-	write_lock_bh(&queue_lock);
-	__ipq_flush(verdict);
-	write_unlock_bh(&queue_lock);
-}
-
-static struct sk_buff *
-ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
-{
-	unsigned char *old_tail;
-	size_t size = 0;
-	size_t data_len = 0;
-	struct sk_buff *skb;
-	struct ipq_packet_msg *pmsg;
-	struct nlmsghdr *nlh;
-
-	read_lock_bh(&queue_lock);
-	
-	switch (copy_mode) {
-	case IPQ_COPY_META:
-	case IPQ_COPY_NONE:
-		size = NLMSG_SPACE(sizeof(*pmsg));
-		data_len = 0;
-		break;
-	
-	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entry->skb,
-		                               entry->info->outdev == NULL))) {
-			read_unlock_bh(&queue_lock);
-			return NULL;
-		}
-		if (copy_range == 0 || copy_range > entry->skb->len)
-			data_len = entry->skb->len;
-		else
-			data_len = copy_range;
-		
-		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
-		break;
-	
-	default:
-		*errp = -EINVAL;
-		read_unlock_bh(&queue_lock);
-		return NULL;
-	}
-
-	read_unlock_bh(&queue_lock);
-
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb)
-		goto nlmsg_failure;
-		
-	old_tail= skb->tail;
-	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
-	pmsg = NLMSG_DATA(nlh);
-	memset(pmsg, 0, sizeof(*pmsg));
-
-	pmsg->packet_id       = (unsigned long )entry;
-	pmsg->data_len        = data_len;
-	pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
-	pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
-	pmsg->mark            = entry->skb->nfmark;
-	pmsg->hook            = entry->info->hook;
-	pmsg->hw_protocol     = entry->skb->protocol;
-	
-	if (entry->info->indev)
-		strcpy(pmsg->indev_name, entry->info->indev->name);
-	else
-		pmsg->indev_name[0] = '\0';
-	
-	if (entry->info->outdev)
-		strcpy(pmsg->outdev_name, entry->info->outdev->name);
-	else
-		pmsg->outdev_name[0] = '\0';
-	
-	if (entry->info->indev && entry->skb->dev) {
-		pmsg->hw_type = entry->skb->dev->type;
-		if (entry->skb->dev->hard_header_parse)
-			pmsg->hw_addrlen =
-				entry->skb->dev->hard_header_parse(entry->skb,
-				                                   pmsg->hw_addr);
-	}
-	
-	if (data_len)
-		if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
-			BUG();
-		
-	nlh->nlmsg_len = skb->tail - old_tail;
-	return skb;
-
-nlmsg_failure:
-	if (skb)
-		kfree_skb(skb);
-	*errp = -EINVAL;
-	printk(KERN_ERR "ip6_queue: error creating packet message\n");
-	return NULL;
-}
-
-static int
-ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 
-		   unsigned int queuenum, void *data)
-{
-	int status = -EINVAL;
-	struct sk_buff *nskb;
-	struct ipq_queue_entry *entry;
-
-	if (copy_mode == IPQ_COPY_NONE)
-		return -EAGAIN;
-
-	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
-	if (entry == NULL) {
-		printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
-		return -ENOMEM;
-	}
-
-	entry->info = info;
-	entry->skb = skb;
-
-	nskb = ipq_build_packet_message(entry, &status);
-	if (nskb == NULL)
-		goto err_out_free;
-		
-	write_lock_bh(&queue_lock);
-	
-	if (!peer_pid)
-		goto err_out_free_nskb; 
-
-	if (queue_total >= queue_maxlen) {
-                queue_dropped++;
-		status = -ENOSPC;
-		if (net_ratelimit())
-		        printk (KERN_WARNING "ip6_queue: fill at %d entries, "
-				"dropping packet(s).  Dropped: %d\n", queue_total,
-				queue_dropped);
-		goto err_out_free_nskb;
-	}
-
- 	/* netlink_unicast will either free the nskb or attach it to a socket */ 
-	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
-	if (status < 0) {
- 	        queue_user_dropped++;
-		goto err_out_unlock;
-	}
-	
-	__ipq_enqueue_entry(entry);
-
-	write_unlock_bh(&queue_lock);
-	return status;
-	
-err_out_free_nskb:
-	kfree_skb(nskb); 
-	
-err_out_unlock:
-	write_unlock_bh(&queue_lock);
-
-err_out_free:
-	kfree(entry);
-	return status;
-}
-
-static int
-ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
-{
-	int diff;
-	struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
-
-	if (v->data_len < sizeof(*user_iph))
-		return 0;
-	diff = v->data_len - e->skb->len;
-	if (diff < 0)
-		skb_trim(e->skb, v->data_len);
-	else if (diff > 0) {
-		if (v->data_len > 0xFFFF)
-			return -EINVAL;
-		if (diff > skb_tailroom(e->skb)) {
-			struct sk_buff *newskb;
-			
-			newskb = skb_copy_expand(e->skb,
-			                         skb_headroom(e->skb),
-			                         diff,
-			                         GFP_ATOMIC);
-			if (newskb == NULL) {
-				printk(KERN_WARNING "ip6_queue: OOM "
-				      "in mangle, dropping packet\n");
-				return -ENOMEM;
-			}
-			if (e->skb->sk)
-				skb_set_owner_w(newskb, e->skb->sk);
-			kfree_skb(e->skb);
-			e->skb = newskb;
-		}
-		skb_put(e->skb, diff);
-	}
-	if (!skb_make_writable(&e->skb, v->data_len))
-		return -ENOMEM;
-	memcpy(e->skb->data, v->payload, v->data_len);
-	e->skb->ip_summed = CHECKSUM_NONE;
-
-	return 0;
-}
-
-static inline int
-id_cmp(struct ipq_queue_entry *e, unsigned long id)
-{
-	return (id == (unsigned long )e);
-}
-
-static int
-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
-{
-	struct ipq_queue_entry *entry;
-
-	if (vmsg->value > NF_MAX_VERDICT)
-		return -EINVAL;
-
-	entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
-	if (entry == NULL)
-		return -ENOENT;
-	else {
-		int verdict = vmsg->value;
-		
-		if (vmsg->data_len && vmsg->data_len == len)
-			if (ipq_mangle_ipv6(vmsg, entry) < 0)
-				verdict = NF_DROP;
-		
-		ipq_issue_verdict(entry, verdict);
-		return 0;
-	}
-}
-
-static int
-ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status;
-
-	write_lock_bh(&queue_lock);
-	status = __ipq_set_mode(mode, range);
-	write_unlock_bh(&queue_lock);
-	return status;
-}
-
-static int
-ipq_receive_peer(struct ipq_peer_msg *pmsg,
-                 unsigned char type, unsigned int len)
-{
-	int status = 0;
-
-	if (len < sizeof(*pmsg))
-		return -EINVAL;
-
-	switch (type) {
-	case IPQM_MODE:
-		status = ipq_set_mode(pmsg->msg.mode.value,
-		                      pmsg->msg.mode.range);
-		break;
-		
-	case IPQM_VERDICT:
-		if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
-			status = -EINVAL;
-		else
-			status = ipq_set_verdict(&pmsg->msg.verdict,
-			                         len - sizeof(*pmsg));
-			break;
-	default:
-		status = -EINVAL;
-	}
-	return status;
-}
-
-static int
-dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
-{
-	if (entry->info->indev)
-		if (entry->info->indev->ifindex == ifindex)
-			return 1;
-			
-	if (entry->info->outdev)
-		if (entry->info->outdev->ifindex == ifindex)
-			return 1;
-
-	return 0;
-}
-
-static void
-ipq_dev_drop(int ifindex)
-{
-	struct ipq_queue_entry *entry;
-	
-	while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
-		ipq_issue_verdict(entry, NF_DROP);
-}
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
-static inline void
-ipq_rcv_skb(struct sk_buff *skb)
-{
-	int status, type, pid, flags, nlmsglen, skblen;
-	struct nlmsghdr *nlh;
-
-	skblen = skb->len;
-	if (skblen < sizeof(*nlh))
-		return;
-
-	nlh = (struct nlmsghdr *)skb->data;
-	nlmsglen = nlh->nlmsg_len;
-	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
-		return;
-
-	pid = nlh->nlmsg_pid;
-	flags = nlh->nlmsg_flags;
-	
-	if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
-		RCV_SKB_FAIL(-EINVAL);
-		
-	if (flags & MSG_TRUNC)
-		RCV_SKB_FAIL(-ECOMM);
-		
-	type = nlh->nlmsg_type;
-	if (type < NLMSG_NOOP || type >= IPQM_MAX)
-		RCV_SKB_FAIL(-EINVAL);
-		
-	if (type <= IPQM_BASE)
-		return;
-	
-	if (security_netlink_recv(skb))
-		RCV_SKB_FAIL(-EPERM);	
-
-	write_lock_bh(&queue_lock);
-	
-	if (peer_pid) {
-		if (peer_pid != pid) {
-			write_unlock_bh(&queue_lock);
-			RCV_SKB_FAIL(-EBUSY);
-		}
-	} else {
-		net_enable_timestamp();
-		peer_pid = pid;
-	}
-		
-	write_unlock_bh(&queue_lock);
-	
-	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
-	                          skblen - NLMSG_LENGTH(0));
-	if (status < 0)
-		RCV_SKB_FAIL(status);
-		
-	if (flags & NLM_F_ACK)
-		netlink_ack(skb, nlh, 0);
-        return;
-}
-
-static void
-ipq_rcv_sk(struct sock *sk, int len)
-{
-	struct sk_buff *skb;
-	unsigned int qlen;
-
-	down(&ipqnl_sem);
-			
-	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
-		skb = skb_dequeue(&sk->sk_receive_queue);
-		ipq_rcv_skb(skb);
-		kfree_skb(skb);
-	}
-		
-	up(&ipqnl_sem);
-}
-
-static int
-ipq_rcv_dev_event(struct notifier_block *this,
-                  unsigned long event, void *ptr)
-{
-	struct net_device *dev = ptr;
-
-	/* Drop any packets associated with the downed device */
-	if (event == NETDEV_DOWN)
-		ipq_dev_drop(dev->ifindex);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_dev_notifier = {
-	.notifier_call	= ipq_rcv_dev_event,
-};
-
-static int
-ipq_rcv_nl_event(struct notifier_block *this,
-                 unsigned long event, void *ptr)
-{
-	struct netlink_notify *n = ptr;
-
-	if (event == NETLINK_URELEASE &&
-	    n->protocol == NETLINK_IP6_FW && n->pid) {
-		write_lock_bh(&queue_lock);
-		if (n->pid == peer_pid)
-			__ipq_reset();
-		write_unlock_bh(&queue_lock);
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_nl_notifier = {
-	.notifier_call	= ipq_rcv_nl_event,
-};
-
-static struct ctl_table_header *ipq_sysctl_header;
-
-static ctl_table ipq_table[] = {
-	{
-		.ctl_name	= NET_IPQ_QMAX,
-		.procname	= NET_IPQ_QMAX_NAME,
-		.data		= &queue_maxlen,
-		.maxlen		= sizeof(queue_maxlen),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
- 	{ .ctl_name = 0 }
-};
-
-static ctl_table ipq_dir_table[] = {
-	{
-		.ctl_name	= NET_IPV6,
-		.procname	= "ipv6",
-		.mode		= 0555,
-		.child		= ipq_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ipq_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ipq_dir_table
-	},
-	{ .ctl_name = 0 }
-};
-
-static int
-ipq_get_info(char *buffer, char **start, off_t offset, int length)
-{
-	int len;
-
-	read_lock_bh(&queue_lock);
-	
-	len = sprintf(buffer,
-	              "Peer PID          : %d\n"
-	              "Copy mode         : %hu\n"
-	              "Copy range        : %u\n"
-	              "Queue length      : %u\n"
-	              "Queue max. length : %u\n"
-		      "Queue dropped     : %u\n"
-		      "Netfilter dropped : %u\n",
-	              peer_pid,
-	              copy_mode,
-	              copy_range,
-	              queue_total,
-	              queue_maxlen,
-		      queue_dropped,
-		      queue_user_dropped);
-
-	read_unlock_bh(&queue_lock);
-	
-	*start = buffer + offset;
-	len -= offset;
-	if (len > length)
-		len = length;
-	else if (len < 0)
-		len = 0;
-	return len;
-}
-
-static struct nf_queue_handler nfqh = {
-	.name	= "ip6_queue",
-	.outfn	= &ipq_enqueue_packet,
-};
-
-static int
-init_or_cleanup(int init)
-{
-	int status = -ENOMEM;
-	struct proc_dir_entry *proc;
-	
-	if (!init)
-		goto cleanup;
-
-	netlink_register_notifier(&ipq_nl_notifier);
-	ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk,
-	                              THIS_MODULE);
-	if (ipqnl == NULL) {
-		printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
-		goto cleanup_netlink_notifier;
-	}
-
-	proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
-	if (proc)
-		proc->owner = THIS_MODULE;
-	else {
-		printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
-		goto cleanup_ipqnl;
-	}
-	
-	register_netdevice_notifier(&ipq_dev_notifier);
-	ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
-	
-	status = nf_register_queue_handler(PF_INET6, &nfqh);
-	if (status < 0) {
-		printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
-		goto cleanup_sysctl;
-	}
-	return status;
-
-cleanup:
-	nf_unregister_queue_handlers(&nfqh);
-	synchronize_net();
-	ipq_flush(NF_DROP);
-	
-cleanup_sysctl:
-	unregister_sysctl_table(ipq_sysctl_header);
-	unregister_netdevice_notifier(&ipq_dev_notifier);
-	proc_net_remove(IPQ_PROC_FS_NAME);
-	
-cleanup_ipqnl:
-	sock_release(ipqnl->sk_socket);
-	down(&ipqnl_sem);
-	up(&ipqnl_sem);
-	
-cleanup_netlink_notifier:
-	netlink_unregister_notifier(&ipq_nl_notifier);
-	return status;
-}
-
-static int __init init(void)
-{
-	
-	return init_or_cleanup(1);
-}
-
-static void __exit fini(void)
-{
-	init_or_cleanup(0);
-}
-
-MODULE_DESCRIPTION("IPv6 packet queue handler");
-MODULE_LICENSE("GPL");
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d492226c16..e080fbbbc0e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -3,75 +3,60 @@
  *
  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * 	- increase module usage count as soon as we have rules inside
- * 	  a table
- * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *      - new extension header parser code
  */
-#include <linux/config.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/capability.h>
+#include <linux/in.h>
 #include <linux/skbuff.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
 #include <linux/netdevice.h>
 #include <linux/module.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
+#include <linux/poison.h>
 #include <linux/icmpv6.h>
 #include <net/ipv6.h>
+#include <net/compat.h>
 #include <asm/uaccess.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 #include <linux/proc_fs.h>
+#include <linux/err.h>
 #include <linux/cpumask.h>
 
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+#include "../../netfilter/xt_repldata.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv6 packet filter");
 
-#define IPV6_HDR_LEN	(sizeof(struct ipv6hdr))
-#define IPV6_OPTHDR_LEN	(sizeof(struct ipv6_opt_hdr))
-
 /*#define DEBUG_IP_FIREWALL*/
 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
 /*#define DEBUG_IP_FIREWALL_USER*/
 
 #ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...)  printk(format , ## args)
+#define dprintf(format, args...) pr_info(format , ## args)
 #else
 #define dprintf(format, args...)
 #endif
 
 #ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
+#define duprintf(format, args...) pr_info(format , ## args)
 #else
 #define duprintf(format, args...)
 #endif
 
 #ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)						\
-do {								\
-	if (!(x))						\
-		printk("IP_NF_ASSERT: %s:%s:%u\n",		\
-		       __FUNCTION__, __FILE__, __LINE__);	\
-} while(0)
+#define IP_NF_ASSERT(x)	WARN_ON(!(x))
 #else
 #define IP_NF_ASSERT(x)
 #endif
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
-
-static DECLARE_MUTEX(ip6t_mutex);
-
-/* Must have mutex */
-#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
-#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 /* All the better to debug you with... */
@@ -79,6 +64,12 @@ static DECLARE_MUTEX(ip6t_mutex);
 #define inline
 #endif
 
+void *ip6t_alloc_initial_table(const struct xt_table *info)
+{
+	return xt_alloc_initial_table(ip6t, IP6T);
+}
+EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
+
 /*
    We keep a set of rules for each CPU, so we can avoid write-locking
    them in the softirq when updating the counters and therefore
@@ -86,92 +77,27 @@ static DECLARE_MUTEX(ip6t_mutex);
    context stops packets coming through and allows user context to read
    the counters or update the rules.
 
-   To be cache friendly on SMP, we arrange them like so:
-   [ n-entries ]
-   ... cache-align padding ...
-   [ n-entries ]
-
    Hence the start of any table is given by get_table() below.  */
 
-/* The table itself */
-struct ip6t_table_info
-{
-	/* Size per table */
-	unsigned int size;
-	/* Number of entries: FIXME. --RR */
-	unsigned int number;
-	/* Initial number of entries. Needed for module usage count */
-	unsigned int initial_entries;
-
-	/* Entry points and underflows */
-	unsigned int hook_entry[NF_IP6_NUMHOOKS];
-	unsigned int underflow[NF_IP6_NUMHOOKS];
-
-	/* ip6t_entry tables: one per CPU */
-	char entries[0] ____cacheline_aligned;
-};
-
-static LIST_HEAD(ip6t_target);
-static LIST_HEAD(ip6t_match);
-static LIST_HEAD(ip6t_tables);
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
-
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
-#if 0
-#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
-#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
-#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
-#endif
-
-static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
-			      struct in6_addr addr2)
-{
-	int i;
-	for( i = 0; i < 16; i++){
-		if((addr1.s6_addr[i] & mask.s6_addr[i]) != 
-		   (addr2.s6_addr[i] & mask.s6_addr[i]))
-			return 1;
-	}
-	return 0;
-}
-
-/* Check for an extension */
-int 
-ip6t_ext_hdr(u8 nexthdr)
-{
-        return ( (nexthdr == IPPROTO_HOPOPTS)   ||
-                 (nexthdr == IPPROTO_ROUTING)   ||
-                 (nexthdr == IPPROTO_FRAGMENT)  ||
-                 (nexthdr == IPPROTO_ESP)       ||
-                 (nexthdr == IPPROTO_AH)        ||
-                 (nexthdr == IPPROTO_NONE)      ||
-                 (nexthdr == IPPROTO_DSTOPTS) );
-}
-
 /* Returns whether matches rule or not. */
-static inline int
+/* Performance critical - called for every packet */
+static inline bool
 ip6_packet_match(const struct sk_buff *skb,
 		 const char *indev,
 		 const char *outdev,
 		 const struct ip6t_ip6 *ip6info,
 		 unsigned int *protoff,
-		 int *fragoff)
+		 int *fragoff, bool *hotdrop)
 {
-	size_t i;
 	unsigned long ret;
-	const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
+	const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
 
-#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
 
-	if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
-		  IP6T_INV_SRCIP)
-	    || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
-		     IP6T_INV_DSTIP)) {
+	if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
+				       &ip6info->src), IP6T_INV_SRCIP) ||
+	    FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
+				       &ip6info->dst), IP6T_INV_DSTIP)) {
 		dprintf("Source or dest mismatch.\n");
 /*
 		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
@@ -180,189 +106,228 @@ ip6_packet_match(const struct sk_buff *skb,
 		dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
 			ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
 			ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
-		return 0;
+		return false;
 	}
 
-	/* Look for ifname matches; this should unroll nicely. */
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
-		ret |= (((const unsigned long *)indev)[i]
-			^ ((const unsigned long *)ip6info->iniface)[i])
-			& ((const unsigned long *)ip6info->iniface_mask)[i];
-	}
+	ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
 
 	if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
 		dprintf("VIA in mismatch (%s vs %s).%s\n",
 			indev, ip6info->iniface,
 			ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
-		return 0;
+		return false;
 	}
 
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
-		ret |= (((const unsigned long *)outdev)[i]
-			^ ((const unsigned long *)ip6info->outiface)[i])
-			& ((const unsigned long *)ip6info->outiface_mask)[i];
-	}
+	ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
 
 	if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
 		dprintf("VIA out mismatch (%s vs %s).%s\n",
 			outdev, ip6info->outiface,
 			ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
-		return 0;
+		return false;
 	}
 
 /* ... might want to do something with class and flowlabel here ... */
 
 	/* look for the desired protocol header */
 	if((ip6info->flags & IP6T_F_PROTO)) {
-		u_int8_t currenthdr = ipv6->nexthdr;
-		struct ipv6_opt_hdr _hdr, *hp;
-		u_int16_t ptr;		/* Header offset in skb */
-		u_int16_t hdrlen;	/* Header */
-		u_int16_t _fragoff = 0, *fp = NULL;
-
-		ptr = IPV6_HDR_LEN;
-
-		while (ip6t_ext_hdr(currenthdr)) {
-	                /* Is there enough space for the next ext header? */
-	                if (skb->len - ptr < IPV6_OPTHDR_LEN)
-	                        return 0;
-
-			/* NONE or ESP: there isn't protocol part */
-			/* If we want to count these packets in '-p all',
-			 * we will change the return 0 to 1*/
-			if ((currenthdr == IPPROTO_NONE) || 
-				(currenthdr == IPPROTO_ESP))
-				break;
-
-			hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-			BUG_ON(hp == NULL);
-
-			/* Size calculation */
-	                if (currenthdr == IPPROTO_FRAGMENT) {
-				fp = skb_header_pointer(skb,
-						   ptr+offsetof(struct frag_hdr,
-								frag_off),
-						   sizeof(_fragoff),
-						   &_fragoff);
-				if (fp == NULL)
-					return 0;
-
-				_fragoff = ntohs(*fp) & ~0x7;
-	                        hdrlen = 8;
-	                } else if (currenthdr == IPPROTO_AH)
-	                        hdrlen = (hp->hdrlen+2)<<2;
-	                else
-	                        hdrlen = ipv6_optlen(hp);
-
-			currenthdr = hp->nexthdr;
-	                ptr += hdrlen;
-			/* ptr is too large */
-	                if ( ptr > skb->len ) 
-				return 0;
-			if (_fragoff) {
-				if (ip6t_ext_hdr(currenthdr))
-					return 0;
-				break;
-			}
+		int protohdr;
+		unsigned short _frag_off;
+
+		protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off, NULL);
+		if (protohdr < 0) {
+			if (_frag_off == 0)
+				*hotdrop = true;
+			return false;
 		}
-
-		*protoff = ptr;
-		*fragoff = _fragoff;
-
-		/* currenthdr contains the protocol header */
+		*fragoff = _frag_off;
 
 		dprintf("Packet protocol %hi ?= %s%hi.\n",
-				currenthdr, 
+				protohdr,
 				ip6info->invflags & IP6T_INV_PROTO ? "!":"",
 				ip6info->proto);
 
-		if (ip6info->proto == currenthdr) {
+		if (ip6info->proto == protohdr) {
 			if(ip6info->invflags & IP6T_INV_PROTO) {
-				return 0;
+				return false;
 			}
-			return 1;
+			return true;
 		}
 
 		/* We need match for the '-p all', too! */
 		if ((ip6info->proto != 0) &&
 			!(ip6info->invflags & IP6T_INV_PROTO))
-			return 0;
+			return false;
 	}
-	return 1;
+	return true;
 }
 
 /* should be ip6 safe */
-static inline int 
+static bool
 ip6_checkentry(const struct ip6t_ip6 *ipv6)
 {
 	if (ipv6->flags & ~IP6T_F_MASK) {
 		duprintf("Unknown flag bits set: %08X\n",
 			 ipv6->flags & ~IP6T_F_MASK);
-		return 0;
+		return false;
 	}
 	if (ipv6->invflags & ~IP6T_INV_MASK) {
 		duprintf("Unknown invflag bits set: %08X\n",
 			 ipv6->invflags & ~IP6T_INV_MASK);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static unsigned int
-ip6t_error(struct sk_buff **pskb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const void *targinfo,
-	  void *userinfo)
+ip6t_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	if (net_ratelimit())
-		printk("ip6_tables: error: `%s'\n", (char *)targinfo);
+	net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
 
 	return NF_DROP;
 }
 
-static inline
-int do_match(struct ip6t_entry_match *m,
-	     const struct sk_buff *skb,
-	     const struct net_device *in,
-	     const struct net_device *out,
-	     int offset,
-	     unsigned int protoff,
-	     int *hotdrop)
+static inline struct ip6t_entry *
+get_entry(const void *base, unsigned int offset)
 {
-	/* Stop iteration if it doesn't match */
-	if (!m->u.kernel.match->match(skb, in, out, m->data,
-				      offset, protoff, hotdrop))
+	return (struct ip6t_entry *)(base + offset);
+}
+
+/* All zeroes == unconditional rule. */
+/* Mildly perf critical (only if packet tracing is on) */
+static inline bool unconditional(const struct ip6t_ip6 *ipv6)
+{
+	static const struct ip6t_ip6 uncond;
+
+	return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
+}
+
+static inline const struct xt_entry_target *
+ip6t_get_target_c(const struct ip6t_entry *e)
+{
+	return ip6t_get_target((struct ip6t_entry *)e);
+}
+
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
+/* This cries for unification! */
+static const char *const hooknames[] = {
+	[NF_INET_PRE_ROUTING]		= "PREROUTING",
+	[NF_INET_LOCAL_IN]		= "INPUT",
+	[NF_INET_FORWARD]		= "FORWARD",
+	[NF_INET_LOCAL_OUT]		= "OUTPUT",
+	[NF_INET_POST_ROUTING]		= "POSTROUTING",
+};
+
+enum nf_ip_trace_comments {
+	NF_IP6_TRACE_COMMENT_RULE,
+	NF_IP6_TRACE_COMMENT_RETURN,
+	NF_IP6_TRACE_COMMENT_POLICY,
+};
+
+static const char *const comments[] = {
+	[NF_IP6_TRACE_COMMENT_RULE]	= "rule",
+	[NF_IP6_TRACE_COMMENT_RETURN]	= "return",
+	[NF_IP6_TRACE_COMMENT_POLICY]	= "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+	.type = NF_LOG_TYPE_LOG,
+	.u = {
+		.log = {
+			.level = 4,
+			.logflags = NF_LOG_MASK,
+		},
+	},
+};
+
+/* Mildly perf critical (only if packet tracing is on) */
+static inline int
+get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
+		      const char *hookname, const char **chainname,
+		      const char **comment, unsigned int *rulenum)
+{
+	const struct xt_standard_target *t = (void *)ip6t_get_target_c(s);
+
+	if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
+		/* Head of user chain: ERROR target with chainname */
+		*chainname = t->target.data;
+		(*rulenum) = 0;
+	} else if (s == e) {
+		(*rulenum)++;
+
+		if (s->target_offset == sizeof(struct ip6t_entry) &&
+		    strcmp(t->target.u.kernel.target->name,
+			   XT_STANDARD_TARGET) == 0 &&
+		    t->verdict < 0 &&
+		    unconditional(&s->ipv6)) {
+			/* Tail of chains: STANDARD target (return/policy) */
+			*comment = *chainname == hookname
+				? comments[NF_IP6_TRACE_COMMENT_POLICY]
+				: comments[NF_IP6_TRACE_COMMENT_RETURN];
+		}
 		return 1;
-	else
-		return 0;
+	} else
+		(*rulenum)++;
+
+	return 0;
 }
 
-static inline struct ip6t_entry *
-get_entry(void *base, unsigned int offset)
+static void trace_packet(const struct sk_buff *skb,
+			 unsigned int hook,
+			 const struct net_device *in,
+			 const struct net_device *out,
+			 const char *tablename,
+			 const struct xt_table_info *private,
+			 const struct ip6t_entry *e)
 {
-	return (struct ip6t_entry *)(base + offset);
+	const void *table_base;
+	const struct ip6t_entry *root;
+	const char *hookname, *chainname, *comment;
+	const struct ip6t_entry *iter;
+	unsigned int rulenum = 0;
+	struct net *net = dev_net(in ? in : out);
+
+	table_base = private->entries[smp_processor_id()];
+	root = get_entry(table_base, private->hook_entry[hook]);
+
+	hookname = chainname = hooknames[hook];
+	comment = comments[NF_IP6_TRACE_COMMENT_RULE];
+
+	xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
+		if (get_chainname_rulenum(iter, e, hookname,
+		    &chainname, &comment, &rulenum) != 0)
+			break;
+
+	nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
+		      "TRACE: %s:%s:%s:%u ",
+		      tablename, chainname, comment, rulenum);
+}
+#endif
+
+static inline __pure struct ip6t_entry *
+ip6t_next_entry(const struct ip6t_entry *entry)
+{
+	return (void *)entry + entry->next_offset;
 }
 
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
-ip6t_do_table(struct sk_buff **pskb,
+ip6t_do_table(struct sk_buff *skb,
 	      unsigned int hook,
 	      const struct net_device *in,
 	      const struct net_device *out,
-	      struct ip6t_table *table,
-	      void *userdata)
+	      struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
-	int offset = 0;
-	unsigned int protoff = 0;
-	int hotdrop = 0;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
-	void *table_base;
-	struct ip6t_entry *e, *back;
+	const void *table_base;
+	struct ip6t_entry *e, **jumpstack;
+	unsigned int *stackptr, origptr, cpu;
+	const struct xt_table_info *private;
+	struct xt_action_param acpar;
+	unsigned int addend;
 
 	/* Initialization */
 	indev = in ? in->name : nulldevname;
@@ -373,292 +338,130 @@ ip6t_do_table(struct sk_buff **pskb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
+	acpar.hotdrop = false;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.family  = NFPROTO_IPV6;
+	acpar.hooknum = hook;
 
-	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	table_base = (void *)table->private->entries
-		+ TABLE_OFFSET(table->private, smp_processor_id());
-	e = get_entry(table_base, table->private->hook_entry[hook]);
-
-#ifdef CONFIG_NETFILTER_DEBUG
-	/* Check noone else using our table */
-	if (((struct ip6t_entry *)table_base)->comefrom != 0xdead57ac
-	    && ((struct ip6t_entry *)table_base)->comefrom != 0xeeeeeeec) {
-		printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
-		       smp_processor_id(),
-		       table->name,
-		       &((struct ip6t_entry *)table_base)->comefrom,
-		       ((struct ip6t_entry *)table_base)->comefrom);
-	}
-	((struct ip6t_entry *)table_base)->comefrom = 0x57acc001;
-#endif
 
-	/* For return from builtin chain */
-	back = get_entry(table_base, table->private->underflow[hook]);
+	local_bh_disable();
+	addend = xt_write_recseq_begin();
+	private = table->private;
+	/*
+	 * Ensure we load private-> members after we've fetched the base
+	 * pointer.
+	 */
+	smp_read_barrier_depends();
+	cpu        = smp_processor_id();
+	table_base = private->entries[cpu];
+	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
+	stackptr   = per_cpu_ptr(private->stackptr, cpu);
+	origptr    = *stackptr;
+
+	e = get_entry(table_base, private->hook_entry[hook]);
 
 	do {
+		const struct xt_entry_target *t;
+		const struct xt_entry_match *ematch;
+
 		IP_NF_ASSERT(e);
-		IP_NF_ASSERT(back);
-		if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
-			&protoff, &offset)) {
-			struct ip6t_entry_target *t;
-
-			if (IP6T_MATCH_ITERATE(e, do_match,
-					       *pskb, in, out,
-					       offset, protoff, &hotdrop) != 0)
+		acpar.thoff = 0;
+		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
+		    &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
+ no_match:
+			e = ip6t_next_entry(e);
+			continue;
+		}
+
+		xt_ematch_foreach(ematch, e) {
+			acpar.match     = ematch->u.kernel.match;
+			acpar.matchinfo = ematch->data;
+			if (!acpar.match->match(skb, &acpar))
 				goto no_match;
+		}
 
-			ADD_COUNTER(e->counters,
-				    ntohs((*pskb)->nh.ipv6h->payload_len)
-				    + IPV6_HDR_LEN,
-				    1);
-
-			t = ip6t_get_target(e);
-			IP_NF_ASSERT(t->u.kernel.target);
-			/* Standard target? */
-			if (!t->u.kernel.target->target) {
-				int v;
-
-				v = ((struct ip6t_standard_target *)t)->verdict;
-				if (v < 0) {
-					/* Pop from stack? */
-					if (v != IP6T_RETURN) {
-						verdict = (unsigned)(-v) - 1;
-						break;
-					}
-					e = back;
-					back = get_entry(table_base,
-							 back->comefrom);
-					continue;
-				}
-				if (table_base + v != (void *)e + e->next_offset
-				    && !(e->ipv6.flags & IP6T_F_GOTO)) {
-					/* Save old back ptr in next entry */
-					struct ip6t_entry *next
-						= (void *)e + e->next_offset;
-					next->comefrom
-						= (void *)back - table_base;
-					/* set back pointer to next entry */
-					back = next;
-				}
+		ADD_COUNTER(e->counters, skb->len, 1);
 
-				e = get_entry(table_base, v);
-			} else {
-				/* Targets which reenter must return
-                                   abs. verdicts */
-#ifdef CONFIG_NETFILTER_DEBUG
-				((struct ip6t_entry *)table_base)->comefrom
-					= 0xeeeeeeec;
-#endif
-				verdict = t->u.kernel.target->target(pskb,
-								     in, out,
-								     hook,
-								     t->data,
-								     userdata);
+		t = ip6t_get_target_c(e);
+		IP_NF_ASSERT(t->u.kernel.target);
 
-#ifdef CONFIG_NETFILTER_DEBUG
-				if (((struct ip6t_entry *)table_base)->comefrom
-				    != 0xeeeeeeec
-				    && verdict == IP6T_CONTINUE) {
-					printk("Target %s reentered!\n",
-					       t->u.kernel.target->name);
-					verdict = NF_DROP;
-				}
-				((struct ip6t_entry *)table_base)->comefrom
-					= 0x57acc001;
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
+		/* The packet is traced: log it */
+		if (unlikely(skb->nf_trace))
+			trace_packet(skb, hook, in, out,
+				     table->name, private, e);
 #endif
-				if (verdict == IP6T_CONTINUE)
-					e = (void *)e + e->next_offset;
+		/* Standard target? */
+		if (!t->u.kernel.target->target) {
+			int v;
+
+			v = ((struct xt_standard_target *)t)->verdict;
+			if (v < 0) {
+				/* Pop from stack? */
+				if (v != XT_RETURN) {
+					verdict = (unsigned int)(-v) - 1;
+					break;
+				}
+				if (*stackptr <= origptr)
+					e = get_entry(table_base,
+					    private->underflow[hook]);
 				else
-					/* Verdict */
+					e = ip6t_next_entry(jumpstack[--*stackptr]);
+				continue;
+			}
+			if (table_base + v != ip6t_next_entry(e) &&
+			    !(e->ipv6.flags & IP6T_F_GOTO)) {
+				if (*stackptr >= private->stacksize) {
+					verdict = NF_DROP;
 					break;
+				}
+				jumpstack[(*stackptr)++] = e;
 			}
-		} else {
 
-		no_match:
-			e = (void *)e + e->next_offset;
+			e = get_entry(table_base, v);
+			continue;
 		}
-	} while (!hotdrop);
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac;
-#endif
-	read_unlock_bh(&table->lock);
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
+
+		verdict = t->u.kernel.target->target(skb, &acpar);
+		if (verdict == XT_CONTINUE)
+			e = ip6t_next_entry(e);
+		else
+			/* Verdict */
+			break;
+	} while (!acpar.hotdrop);
+
+	*stackptr = origptr;
+
+ 	xt_write_recseq_end(addend);
+ 	local_bh_enable();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else return verdict;
 #endif
 }
 
-/*
- * These are weird, but module loading must not be done with mutex
- * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
- */
-
-/* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
-static inline struct ip6t_table *find_table_lock(const char *name)
-{
-	struct ip6t_table *t;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &ip6t_tables, list)
-		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
-			return t;
-	up(&ip6t_mutex);
-	return NULL;
-}
-
-/* Find match, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct ip6t_match *find_match(const char *name, u8 revision)
-{
-	struct ip6t_match *m;
-	int err = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(m, &ip6t_match, list) {
-		if (strcmp(m->name, name) == 0) {
-			if (m->revision == revision) {
-				if (try_module_get(m->me)) {
-					up(&ip6t_mutex);
-					return m;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&ip6t_mutex);
-	return ERR_PTR(err);
-}
-
-/* Find target, grabs ref.  Returns ERR_PTR() on error. */
-static inline struct ip6t_target *find_target(const char *name, u8 revision)
-{
-	struct ip6t_target *t;
-	int err = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return ERR_PTR(-EINTR);
-
-	list_for_each_entry(t, &ip6t_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision == revision) {
-				if (try_module_get(t->me)) {
-					up(&ip6t_mutex);
-					return t;
-				}
-			} else
-				err = -EPROTOTYPE; /* Found something. */
-		}
-	}
-	up(&ip6t_mutex);
-	return ERR_PTR(err);
-}
-
-struct ip6t_target *ip6t_find_target(const char *name, u8 revision)
-{
-	struct ip6t_target *target;
-
-	target = try_then_request_module(find_target(name, revision),
-					 "ip6t_%s", name);
-	if (IS_ERR(target) || !target)
-		return NULL;
-	return target;
-}
-
-static int match_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct ip6t_match *m;
-	int have_rev = 0;
-
-	list_for_each_entry(m, &ip6t_match, list) {
-		if (strcmp(m->name, name) == 0) {
-			if (m->revision > *bestp)
-				*bestp = m->revision;
-			if (m->revision == revision)
-				have_rev = 1;
-		}
-	}
-	return have_rev;
-}
-
-static int target_revfn(const char *name, u8 revision, int *bestp)
-{
-	struct ip6t_target *t;
-	int have_rev = 0;
-
-	list_for_each_entry(t, &ip6t_target, list) {
-		if (strcmp(t->name, name) == 0) {
-			if (t->revision > *bestp)
-				*bestp = t->revision;
-			if (t->revision == revision)
-				have_rev = 1;
-		}
-	}
-	return have_rev;
-}
-
-/* Returns true or fals (if no such extension at all) */
-static inline int find_revision(const char *name, u8 revision,
-				int (*revfn)(const char *, u8, int *),
-				int *err)
-{
-	int have_rev, best = -1;
-
-	if (down_interruptible(&ip6t_mutex) != 0) {
-		*err = -EINTR;
-		return 1;
-	}
-	have_rev = revfn(name, revision, &best);
-	up(&ip6t_mutex);
-
-	/* Nothing at all?  Return 0 to try loading module. */
-	if (best == -1) {
-		*err = -ENOENT;
-		return 0;
-	}
-
-	*err = best;
-	if (!have_rev)
-		*err = -EPROTONOSUPPORT;
-	return 1;
-}
-
-
-/* All zeroes == unconditional rule. */
-static inline int
-unconditional(const struct ip6t_ip6 *ipv6)
-{
-	unsigned int i;
-
-	for (i = 0; i < sizeof(*ipv6); i++)
-		if (((char *)ipv6)[i])
-			break;
-
-	return (i == sizeof(*ipv6));
-}
-
 /* Figures out from what hook each rule can be called: returns 0 if
    there are loops.  Puts hook bitmask in comefrom. */
 static int
-mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
+mark_source_chains(const struct xt_table_info *newinfo,
+		   unsigned int valid_hooks, void *entry0)
 {
 	unsigned int hook;
 
 	/* No recursion; use packet counter to save back ptrs (reset
 	   to 0 as we leave), and comefrom to save source hook bitmask */
-	for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
+	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
-		struct ip6t_entry *e
-			= (struct ip6t_entry *)(newinfo->entries + pos);
+		struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos);
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
@@ -667,32 +470,41 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 		e->counters.pcnt = pos;
 
 		for (;;) {
-			struct ip6t_standard_target *t
-				= (void *)ip6t_get_target(e);
+			const struct xt_standard_target *t
+				= (void *)ip6t_get_target_c(e);
+			int visited = e->comefrom & (1 << hook);
 
-			if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) {
-				printk("iptables: loop hook %u pos %u %08X.\n",
+			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
+				pr_err("iptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
-			e->comefrom
-				|= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
+			e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
 
 			/* Unconditional return/END. */
-			if (e->target_offset == sizeof(struct ip6t_entry)
-			    && (strcmp(t->target.u.user.name,
-				       IP6T_STANDARD_TARGET) == 0)
-			    && t->verdict < 0
-			    && unconditional(&e->ipv6)) {
+			if ((e->target_offset == sizeof(struct ip6t_entry) &&
+			     (strcmp(t->target.u.user.name,
+				     XT_STANDARD_TARGET) == 0) &&
+			     t->verdict < 0 &&
+			     unconditional(&e->ipv6)) || visited) {
 				unsigned int oldpos, size;
 
+				if ((strcmp(t->target.u.user.name,
+					    XT_STANDARD_TARGET) == 0) &&
+				    t->verdict < -NF_MAX_VERDICT - 1) {
+					duprintf("mark_source_chains: bad "
+						"negative verdict (%i)\n",
+								t->verdict);
+					return 0;
+				}
+
 				/* Return: backtrack through the last
 				   big jump. */
 				do {
-					e->comefrom ^= (1<<NF_IP6_NUMHOOKS);
+					e->comefrom ^= (1<<NF_INET_NUMHOOKS);
 #ifdef DEBUG_IP_FIREWALL_USER
 					if (e->comefrom
-					    & (1 << NF_IP6_NUMHOOKS)) {
+					    & (1 << NF_INET_NUMHOOKS)) {
 						duprintf("Back unset "
 							 "on hook %u "
 							 "rule %u\n",
@@ -708,21 +520,28 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 						goto next;
 
 					e = (struct ip6t_entry *)
-						(newinfo->entries + pos);
+						(entry0 + pos);
 				} while (oldpos == pos + e->next_offset);
 
 				/* Move along one */
 				size = e->next_offset;
 				e = (struct ip6t_entry *)
-					(newinfo->entries + pos + size);
+					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
 			} else {
 				int newpos = t->verdict;
 
 				if (strcmp(t->target.u.user.name,
-					   IP6T_STANDARD_TARGET) == 0
-				    && newpos >= 0) {
+					   XT_STANDARD_TARGET) == 0 &&
+				    newpos >= 0) {
+					if (newpos > newinfo->size -
+						sizeof(struct ip6t_entry)) {
+						duprintf("mark_source_chains: "
+							"bad verdict (%i)\n",
+								newpos);
+						return 0;
+					}
 					/* This a jump; chase it. */
 					duprintf("Jump rule %u -> %u\n",
 						 pos, newpos);
@@ -731,7 +550,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 					newpos = pos + e->next_offset;
 				}
 				e = (struct ip6t_entry *)
-					(newinfo->entries + newpos);
+					(entry0 + newpos);
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
@@ -742,218 +561,257 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
 	return 1;
 }
 
-static inline int
-cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
+static void cleanup_match(struct xt_entry_match *m, struct net *net)
 {
-	if (i && (*i)-- == 0)
-		return 1;
-
-	if (m->u.kernel.match->destroy)
-		m->u.kernel.match->destroy(m->data,
-					   m->u.match_size - sizeof(*m));
-	module_put(m->u.kernel.match->me);
-	return 0;
+	struct xt_mtdtor_param par;
+
+	par.net       = net;
+	par.match     = m->u.kernel.match;
+	par.matchinfo = m->data;
+	par.family    = NFPROTO_IPV6;
+	if (par.match->destroy != NULL)
+		par.match->destroy(&par);
+	module_put(par.match->me);
 }
 
-static inline int
-standard_check(const struct ip6t_entry_target *t,
-	       unsigned int max_offset)
+static int
+check_entry(const struct ip6t_entry *e, const char *name)
 {
-	struct ip6t_standard_target *targ = (void *)t;
+	const struct xt_entry_target *t;
 
-	/* Check standard info. */
-	if (t->u.target_size
-	    != IP6T_ALIGN(sizeof(struct ip6t_standard_target))) {
-		duprintf("standard_check: target size %u != %u\n",
-			 t->u.target_size,
-			 IP6T_ALIGN(sizeof(struct ip6t_standard_target)));
-		return 0;
+	if (!ip6_checkentry(&e->ipv6)) {
+		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+		return -EINVAL;
 	}
 
-	if (targ->verdict >= 0
-	    && targ->verdict > max_offset - sizeof(struct ip6t_entry)) {
-		duprintf("ip6t_standard_check: bad verdict (%i)\n",
-			 targ->verdict);
-		return 0;
-	}
+	if (e->target_offset + sizeof(struct xt_entry_target) >
+	    e->next_offset)
+		return -EINVAL;
+
+	t = ip6t_get_target_c(e);
+	if (e->target_offset + t->u.target_size > e->next_offset)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
+{
+	const struct ip6t_ip6 *ipv6 = par->entryinfo;
+	int ret;
+
+	par->match     = m->u.kernel.match;
+	par->matchinfo = m->data;
 
-	if (targ->verdict < -NF_MAX_VERDICT - 1) {
-		duprintf("ip6t_standard_check: bad negative verdict (%i)\n",
-			 targ->verdict);
-		return 0;
+	ret = xt_check_match(par, m->u.match_size - sizeof(*m),
+			     ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
+	if (ret < 0) {
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 par.match->name);
+		return ret;
 	}
-	return 1;
+	return 0;
 }
 
-static inline int
-check_match(struct ip6t_entry_match *m,
-	    const char *name,
-	    const struct ip6t_ip6 *ipv6,
-	    unsigned int hookmask,
-	    unsigned int *i)
+static int
+find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
 {
-	struct ip6t_match *match;
+	struct xt_match *match;
+	int ret;
 
-	match = try_then_request_module(find_match(m->u.user.name,
-						   m->u.user.revision),
-					"ip6t_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
-		duprintf("check_match: `%s' not found\n", m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+	match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
+		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 
-	if (m->u.kernel.match->checkentry
-	    && !m->u.kernel.match->checkentry(name, ipv6, m->data,
-					      m->u.match_size - sizeof(*m),
-					      hookmask)) {
-		module_put(m->u.kernel.match->me);
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 m->u.kernel.match->name);
-		return -EINVAL;
-	}
+	ret = check_match(m, par);
+	if (ret)
+		goto err;
 
-	(*i)++;
 	return 0;
+err:
+	module_put(m->u.kernel.match->me);
+	return ret;
 }
 
-static struct ip6t_target ip6t_standard_target;
+static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
+{
+	struct xt_entry_target *t = ip6t_get_target(e);
+	struct xt_tgchk_param par = {
+		.net       = net,
+		.table     = name,
+		.entryinfo = e,
+		.target    = t->u.kernel.target,
+		.targinfo  = t->data,
+		.hook_mask = e->comefrom,
+		.family    = NFPROTO_IPV6,
+	};
+	int ret;
 
-static inline int
-check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
-	    unsigned int *i)
+	t = ip6t_get_target(e);
+	ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
+	      e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO);
+	if (ret < 0) {
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		return ret;
+	}
+	return 0;
+}
+
+static int
+find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
+		 unsigned int size)
 {
-	struct ip6t_entry_target *t;
-	struct ip6t_target *target;
+	struct xt_entry_target *t;
+	struct xt_target *target;
 	int ret;
 	unsigned int j;
+	struct xt_mtchk_param mtpar;
+	struct xt_entry_match *ematch;
 
-	if (!ip6_checkentry(&e->ipv6)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
-		return -EINVAL;
-	}
+	ret = check_entry(e, name);
+	if (ret)
+		return ret;
 
 	j = 0;
-	ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
-	if (ret != 0)
-		goto cleanup_matches;
+	mtpar.net	= net;
+	mtpar.table     = name;
+	mtpar.entryinfo = &e->ipv6;
+	mtpar.hook_mask = e->comefrom;
+	mtpar.family    = NFPROTO_IPV6;
+	xt_ematch_foreach(ematch, e) {
+		ret = find_check_match(ematch, &mtpar);
+		if (ret != 0)
+			goto cleanup_matches;
+		++j;
+	}
 
 	t = ip6t_get_target(e);
-	target = try_then_request_module(find_target(t->u.user.name,
-						     t->u.user.revision),
-					 "ip6t_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
-		duprintf("check_entry: `%s' not found\n", t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+	target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
+		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
+		ret = PTR_ERR(target);
 		goto cleanup_matches;
 	}
 	t->u.kernel.target = target;
 
-	if (t->u.kernel.target == &ip6t_standard_target) {
-		if (!standard_check(t, size)) {
-			ret = -EINVAL;
-			goto cleanup_matches;
-		}
-	} else if (t->u.kernel.target->checkentry
-		   && !t->u.kernel.target->checkentry(name, e, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
-						      e->comefrom)) {
-		module_put(t->u.kernel.target->me);
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 t->u.kernel.target->name);
-		ret = -EINVAL;
-		goto cleanup_matches;
-	}
-
-	(*i)++;
+	ret = check_target(e, net, name);
+	if (ret)
+		goto err;
 	return 0;
-
+ err:
+	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	IP6T_MATCH_ITERATE(e, cleanup_match, &j);
+	xt_ematch_foreach(ematch, e) {
+		if (j-- == 0)
+			break;
+		cleanup_match(ematch, net);
+	}
 	return ret;
 }
 
-static inline int
+static bool check_underflow(const struct ip6t_entry *e)
+{
+	const struct xt_entry_target *t;
+	unsigned int verdict;
+
+	if (!unconditional(&e->ipv6))
+		return false;
+	t = ip6t_get_target_c(e);
+	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+		return false;
+	verdict = ((struct xt_standard_target *)t)->verdict;
+	verdict = -verdict - 1;
+	return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
+static int
 check_entry_size_and_hooks(struct ip6t_entry *e,
-			   struct ip6t_table_info *newinfo,
-			   unsigned char *base,
-			   unsigned char *limit,
+			   struct xt_table_info *newinfo,
+			   const unsigned char *base,
+			   const unsigned char *limit,
 			   const unsigned int *hook_entries,
 			   const unsigned int *underflows,
-			   unsigned int *i)
+			   unsigned int valid_hooks)
 {
 	unsigned int h;
 
-	if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0
-	    || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
+	if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
+	    (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
 		duprintf("Bad offset %p\n", e);
 		return -EINVAL;
 	}
 
 	if (e->next_offset
-	    < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) {
+	    < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) {
 		duprintf("checking: element %p size %u\n",
 			 e, e->next_offset);
 		return -EINVAL;
 	}
 
 	/* Check hooks & underflows */
-	for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
+	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+		if (!(valid_hooks & (1 << h)))
+			continue;
 		if ((unsigned char *)e - base == hook_entries[h])
 			newinfo->hook_entry[h] = hook_entries[h];
-		if ((unsigned char *)e - base == underflows[h])
+		if ((unsigned char *)e - base == underflows[h]) {
+			if (!check_underflow(e)) {
+				pr_err("Underflows must be unconditional and "
+				       "use the STANDARD target with "
+				       "ACCEPT/DROP\n");
+				return -EINVAL;
+			}
 			newinfo->underflow[h] = underflows[h];
+		}
 	}
 
-	/* FIXME: underflows must be unconditional, standard verdicts
-           < 0 (not IP6T_RETURN). --RR */
-
 	/* Clear counters and comefrom */
-	e->counters = ((struct ip6t_counters) { 0, 0 });
+	e->counters = ((struct xt_counters) { 0, 0 });
 	e->comefrom = 0;
-
-	(*i)++;
 	return 0;
 }
 
-static inline int
-cleanup_entry(struct ip6t_entry *e, unsigned int *i)
+static void cleanup_entry(struct ip6t_entry *e, struct net *net)
 {
-	struct ip6t_entry_target *t;
-
-	if (i && (*i)-- == 0)
-		return 1;
+	struct xt_tgdtor_param par;
+	struct xt_entry_target *t;
+	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
-	IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
+	xt_ematch_foreach(ematch, e)
+		cleanup_match(ematch, net);
 	t = ip6t_get_target(e);
-	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->data,
-					    t->u.target_size - sizeof(*t));
-	module_put(t->u.kernel.target->me);
-	return 0;
+
+	par.net      = net;
+	par.target   = t->u.kernel.target;
+	par.targinfo = t->data;
+	par.family   = NFPROTO_IPV6;
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+	module_put(par.target->me);
 }
 
 /* Checks and translates the user-supplied table segment (held in
    newinfo) */
 static int
-translate_table(const char *name,
-		unsigned int valid_hooks,
-		struct ip6t_table_info *newinfo,
-		unsigned int size,
-		unsigned int number,
-		const unsigned int *hook_entries,
-		const unsigned int *underflows)
+translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
+                const struct ip6t_replace *repl)
 {
+	struct ip6t_entry *iter;
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
-	newinfo->size = size;
-	newinfo->number = number;
+	newinfo->size = repl->size;
+	newinfo->number = repl->num_entries;
 
 	/* Init all hooks to impossible value. */
-	for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		newinfo->hook_entry[i] = 0xFFFFFFFF;
 		newinfo->underflow[i] = 0xFFFFFFFF;
 	}
@@ -961,161 +819,143 @@ translate_table(const char *name,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
-				check_entry_size_and_hooks,
-				newinfo,
-				newinfo->entries,
-				newinfo->entries + size,
-				hook_entries, underflows, &i);
-	if (ret != 0)
-		return ret;
-
-	if (i != number) {
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+						 entry0 + repl->size,
+						 repl->hook_entry,
+						 repl->underflow,
+						 repl->valid_hooks);
+		if (ret != 0)
+			return ret;
+		++i;
+		if (strcmp(ip6t_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
+	}
+
+	if (i != repl->num_entries) {
 		duprintf("translate_table: %u not %u entries\n",
-			 i, number);
+			 i, repl->num_entries);
 		return -EINVAL;
 	}
 
 	/* Check hooks all assigned */
-	for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 		/* Only hooks which are valid */
-		if (!(valid_hooks & (1 << i)))
+		if (!(repl->valid_hooks & (1 << i)))
 			continue;
 		if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
 			duprintf("Invalid hook entry %u %u\n",
-				 i, hook_entries[i]);
+				 i, repl->hook_entry[i]);
 			return -EINVAL;
 		}
 		if (newinfo->underflow[i] == 0xFFFFFFFF) {
 			duprintf("Invalid underflow %u %u\n",
-				 i, underflows[i]);
+				 i, repl->underflow[i]);
 			return -EINVAL;
 		}
 	}
 
-	if (!mark_source_chains(newinfo, valid_hooks))
+	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
 		return -ELOOP;
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
-				check_entry, name, size, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = find_check_entry(iter, net, repl->name, repl->size);
+		if (ret != 0)
+			break;
+		++i;
+	}
 
 	if (ret != 0) {
-		IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
-				  cleanup_entry, &i);
+		xt_entry_foreach(iter, entry0, newinfo->size) {
+			if (i-- == 0)
+				break;
+			cleanup_entry(iter, net);
+		}
 		return ret;
 	}
 
 	/* And one copy for every other CPU */
-	for_each_cpu(i) {
-		if (i == 0)
-			continue;
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
-		       newinfo->entries,
-		       SMP_ALIGN(newinfo->size));
+	for_each_possible_cpu(i) {
+		if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+			memcpy(newinfo->entries[i], entry0, newinfo->size);
 	}
 
 	return ret;
 }
 
-static struct ip6t_table_info *
-replace_table(struct ip6t_table *table,
-	      unsigned int num_counters,
-	      struct ip6t_table_info *newinfo,
-	      int *error)
-{
-	struct ip6t_table_info *oldinfo;
-
-#ifdef CONFIG_NETFILTER_DEBUG
-	{
-		struct ip6t_entry *table_base;
-		unsigned int i;
-
-		for_each_cpu(i) {
-			table_base =
-				(void *)newinfo->entries
-				+ TABLE_OFFSET(newinfo, i);
-
-			table_base->comefrom = 0xdead57ac;
-		}
-	}
-#endif
-
-	/* Do the substitution. */
-	write_lock_bh(&table->lock);
-	/* Check inside lock: is the old number correct? */
-	if (num_counters != table->private->number) {
-		duprintf("num_counters != table->private->number (%u/%u)\n",
-			 num_counters, table->private->number);
-		write_unlock_bh(&table->lock);
-		*error = -EAGAIN;
-		return NULL;
-	}
-	oldinfo = table->private;
-	table->private = newinfo;
-	newinfo->initial_entries = oldinfo->initial_entries;
-	write_unlock_bh(&table->lock);
-
-	return oldinfo;
-}
-
-/* Gets counters. */
-static inline int
-add_entry_to_counter(const struct ip6t_entry *e,
-		     struct ip6t_counters total[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
-	(*i)++;
-	return 0;
-}
-
 static void
-get_counters(const struct ip6t_table_info *t,
-	     struct ip6t_counters counters[])
+get_counters(const struct xt_table_info *t,
+	     struct xt_counters counters[])
 {
+	struct ip6t_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
+		seqcount_t *s = &per_cpu(xt_recseq, cpu);
+
 		i = 0;
-		IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
-				  t->size,
-				  add_entry_to_counter,
-				  counters,
-				  &i);
+		xt_entry_foreach(iter, t->entries[cpu], t->size) {
+			u64 bcnt, pcnt;
+			unsigned int start;
+
+			do {
+				start = read_seqcount_begin(s);
+				bcnt = iter->counters.bcnt;
+				pcnt = iter->counters.pcnt;
+			} while (read_seqcount_retry(s, start));
+
+			ADD_COUNTER(counters[i], bcnt, pcnt);
+			++i;
+		}
 	}
 }
 
-static int
-copy_entries_to_user(unsigned int total_size,
-		     struct ip6t_table *table,
-		     void __user *userptr)
+static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
-	unsigned int off, num, countersize;
-	struct ip6t_entry *e;
-	struct ip6t_counters *counters;
-	int ret = 0;
+	unsigned int countersize;
+	struct xt_counters *counters;
+	const struct xt_table_info *private = table->private;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
 	   about). */
-	countersize = sizeof(struct ip6t_counters) * table->private->number;
-	counters = vmalloc(countersize);
+	countersize = sizeof(struct xt_counters) * private->number;
+	counters = vzalloc(countersize);
 
 	if (counters == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	/* First, sum counters... */
-	memset(counters, 0, countersize);
-	write_lock_bh(&table->lock);
-	get_counters(table->private, counters);
-	write_unlock_bh(&table->lock);
+	get_counters(private, counters);
 
-	/* ... then copy entire thing from CPU 0... */
-	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+	return counters;
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+		     const struct xt_table *table,
+		     void __user *userptr)
+{
+	unsigned int off, num;
+	const struct ip6t_entry *e;
+	struct xt_counters *counters;
+	const struct xt_table_info *private = table->private;
+	int ret = 0;
+	const void *loc_cpu_entry;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
+	/* choose the copy that is on our node/cpu, ...
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu)
+	 */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 		ret = -EFAULT;
 		goto free_counters;
 	}
@@ -1124,10 +964,10 @@ copy_entries_to_user(unsigned int total_size,
 	/* ... then go back and fix counters and names */
 	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
 		unsigned int i;
-		struct ip6t_entry_match *m;
-		struct ip6t_entry_target *t;
+		const struct xt_entry_match *m;
+		const struct xt_entry_target *t;
 
-		e = (struct ip6t_entry *)(table->private->entries + off);
+		e = (struct ip6t_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
 				 + offsetof(struct ip6t_entry, counters),
 				 &counters[num],
@@ -1142,7 +982,7 @@ copy_entries_to_user(unsigned int total_size,
 			m = (void *)e + i;
 
 			if (copy_to_user(userptr + off + i
-					 + offsetof(struct ip6t_entry_match,
+					 + offsetof(struct xt_entry_match,
 						    u.user.name),
 					 m->u.kernel.match->name,
 					 strlen(m->u.kernel.match->name)+1)
@@ -1152,9 +992,9 @@ copy_entries_to_user(unsigned int total_size,
 			}
 		}
 
-		t = ip6t_get_target(e);
+		t = ip6t_get_target_c(e);
 		if (copy_to_user(userptr + off + e->target_offset
-				 + offsetof(struct ip6t_entry_target,
+				 + offsetof(struct xt_entry_target,
 					    u.user.name),
 				 t->u.kernel.target->name,
 				 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1168,196 +1008,389 @@ copy_entries_to_user(unsigned int total_size,
 	return ret;
 }
 
-static int
-get_entries(const struct ip6t_get_entries *entries,
-	    struct ip6t_get_entries __user *uptr)
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, const void *src)
+{
+	int v = *(compat_int_t *)src;
+
+	if (v > 0)
+		v += xt_compat_calc_jump(AF_INET6, v);
+	memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, const void *src)
+{
+	compat_int_t cv = *(int *)src;
+
+	if (cv > 0)
+		cv -= xt_compat_calc_jump(AF_INET6, cv);
+	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static int compat_calc_entry(const struct ip6t_entry *e,
+			     const struct xt_table_info *info,
+			     const void *base, struct xt_table_info *newinfo)
 {
+	const struct xt_entry_match *ematch;
+	const struct xt_entry_target *t;
+	unsigned int entry_offset;
+	int off, i, ret;
+
+	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+	entry_offset = (void *)e - base;
+	xt_ematch_foreach(ematch, e)
+		off += xt_compat_match_offset(ematch->u.kernel.match);
+	t = ip6t_get_target_c(e);
+	off += xt_compat_target_offset(t->u.kernel.target);
+	newinfo->size -= off;
+	ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+		if (info->hook_entry[i] &&
+		    (e < (struct ip6t_entry *)(base + info->hook_entry[i])))
+			newinfo->hook_entry[i] -= off;
+		if (info->underflow[i] &&
+		    (e < (struct ip6t_entry *)(base + info->underflow[i])))
+			newinfo->underflow[i] -= off;
+	}
+	return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+			     struct xt_table_info *newinfo)
+{
+	struct ip6t_entry *iter;
+	void *loc_cpu_entry;
 	int ret;
-	struct ip6t_table *t;
-
-	t = find_table_lock(entries->name);
-	if (t && !IS_ERR(t)) {
-		duprintf("t->private->number = %u\n",
-			 t->private->number);
-		if (entries->size == t->private->size)
-			ret = copy_entries_to_user(t->private->size,
-						   t, uptr->entrytable);
-		else {
-			duprintf("get_entries: I've got %u not %u!\n",
-				 t->private->size,
-				 entries->size);
-			ret = -EINVAL;
+
+	if (!newinfo || !info)
+		return -EINVAL;
+
+	/* we dont care about newinfo->entries[] */
+	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+	newinfo->initial_entries = 0;
+	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	xt_compat_init_offsets(AF_INET6, info->number);
+	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+		if (ret != 0)
+			return ret;
+	}
+	return 0;
+}
+#endif
+
+static int get_info(struct net *net, void __user *user,
+                    const int *len, int compat)
+{
+	char name[XT_TABLE_MAXNAMELEN];
+	struct xt_table *t;
+	int ret;
+
+	if (*len != sizeof(struct ip6t_getinfo)) {
+		duprintf("length %u != %zu\n", *len,
+			 sizeof(struct ip6t_getinfo));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(name, user, sizeof(name)) != 0)
+		return -EFAULT;
+
+	name[XT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+	if (compat)
+		xt_compat_lock(AF_INET6);
+#endif
+	t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
+				    "ip6table_%s", name);
+	if (!IS_ERR_OR_NULL(t)) {
+		struct ip6t_getinfo info;
+		const struct xt_table_info *private = t->private;
+#ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
+		if (compat) {
+			ret = compat_table_info(private, &tmp);
+			xt_compat_flush_offsets(AF_INET6);
+			private = &tmp;
 		}
+#endif
+		memset(&info, 0, sizeof(info));
+		info.valid_hooks = t->valid_hooks;
+		memcpy(info.hook_entry, private->hook_entry,
+		       sizeof(info.hook_entry));
+		memcpy(info.underflow, private->underflow,
+		       sizeof(info.underflow));
+		info.num_entries = private->number;
+		info.size = private->size;
+		strcpy(info.name, name);
+
+		if (copy_to_user(user, &info, *len) != 0)
+			ret = -EFAULT;
+		else
+			ret = 0;
+
+		xt_table_unlock(t);
 		module_put(t->me);
-		up(&ip6t_mutex);
 	} else
 		ret = t ? PTR_ERR(t) : -ENOENT;
-
+#ifdef CONFIG_COMPAT
+	if (compat)
+		xt_compat_unlock(AF_INET6);
+#endif
 	return ret;
 }
 
 static int
-do_replace(void __user *user, unsigned int len)
+get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
+            const int *len)
 {
 	int ret;
-	struct ip6t_replace tmp;
-	struct ip6t_table *t;
-	struct ip6t_table_info *newinfo, *oldinfo;
-	struct ip6t_counters *counters;
+	struct ip6t_get_entries get;
+	struct xt_table *t;
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+	if (*len < sizeof(get)) {
+		duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+		return -EINVAL;
+	}
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
 		return -EFAULT;
+	if (*len != sizeof(struct ip6t_get_entries) + get.size) {
+		duprintf("get_entries: %u != %zu\n",
+			 *len, sizeof(get) + get.size);
+		return -EINVAL;
+	}
 
-	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
-		return -ENOMEM;
-
-	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(tmp.size) *
-			  		(highest_possible_processor_id()+1));
-	if (!newinfo)
-		return -ENOMEM;
+	t = xt_find_table_lock(net, AF_INET6, get.name);
+	if (!IS_ERR_OR_NULL(t)) {
+		struct xt_table_info *private = t->private;
+		duprintf("t->private->number = %u\n", private->number);
+		if (get.size == private->size)
+			ret = copy_entries_to_user(private->size,
+						   t, uptr->entrytable);
+		else {
+			duprintf("get_entries: I've got %u not %u!\n",
+				 private->size, get.size);
+			ret = -EAGAIN;
+		}
+		module_put(t->me);
+		xt_table_unlock(t);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
 
-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
-			   tmp.size) != 0) {
-		ret = -EFAULT;
-		goto free_newinfo;
-	}
+	return ret;
+}
 
-	counters = vmalloc(tmp.num_counters * sizeof(struct ip6t_counters));
+static int
+__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+	     struct xt_table_info *newinfo, unsigned int num_counters,
+	     void __user *counters_ptr)
+{
+	int ret;
+	struct xt_table *t;
+	struct xt_table_info *oldinfo;
+	struct xt_counters *counters;
+	const void *loc_cpu_old_entry;
+	struct ip6t_entry *iter;
+
+	ret = 0;
+	counters = vzalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
-		goto free_newinfo;
+		goto out;
 	}
-	memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
-	if (ret != 0)
-		goto free_newinfo_counters;
-
-	duprintf("ip_tables: Translated table\n");
-
-	t = try_then_request_module(find_table_lock(tmp.name),
-				    "ip6table_%s", tmp.name);
-	if (!t || IS_ERR(t)) {
+	t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
+				    "ip6table_%s", name);
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free_newinfo_counters_untrans;
 	}
 
 	/* You lied! */
-	if (tmp.valid_hooks != t->valid_hooks) {
+	if (valid_hooks != t->valid_hooks) {
 		duprintf("Valid hook crap: %08X vs %08X\n",
-			 tmp.valid_hooks, t->valid_hooks);
+			 valid_hooks, t->valid_hooks);
 		ret = -EINVAL;
 		goto put_module;
 	}
 
-	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
 	/* Update module usage count based on number of rules */
 	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
 		oldinfo->number, oldinfo->initial_entries, newinfo->number);
-	if ((oldinfo->number > oldinfo->initial_entries) || 
-	    (newinfo->number <= oldinfo->initial_entries)) 
+	if ((oldinfo->number > oldinfo->initial_entries) ||
+	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 	if ((oldinfo->number > oldinfo->initial_entries) &&
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters. */
+	/* Get the old counters, and synchronize with replace */
 	get_counters(oldinfo, counters);
+
 	/* Decrease module usage counts and free resource */
-	IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
-	vfree(oldinfo);
-	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
-		ret = -EFAULT;
+	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+		cleanup_entry(iter, net);
+
+	xt_free_table_info(oldinfo);
+	if (copy_to_user(counters_ptr, counters,
+			 sizeof(struct xt_counters) * num_counters) != 0) {
+		/* Silent error, can't fail, new table is already in place */
+		net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
+	}
 	vfree(counters);
-	up(&ip6t_mutex);
+	xt_table_unlock(t);
 	return ret;
 
  put_module:
 	module_put(t->me);
-	up(&ip6t_mutex);
+	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
- free_newinfo_counters:
 	vfree(counters);
- free_newinfo:
-	vfree(newinfo);
+ out:
 	return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static inline int
-add_counter_to_entry(struct ip6t_entry *e,
-		     const struct ip6t_counters addme[],
-		     unsigned int *i)
+static int
+do_replace(struct net *net, const void __user *user, unsigned int len)
 {
-#if 0
-	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
-		 *i,
-		 (long unsigned int)e->counters.pcnt,
-		 (long unsigned int)e->counters.bcnt,
-		 (long unsigned int)addme[*i].pcnt,
-		 (long unsigned int)addme[*i].bcnt);
-#endif
+	int ret;
+	struct ip6t_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+	struct ip6t_entry *iter;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* overflow check */
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+	tmp.name[sizeof(tmp.name)-1] = 0;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
 
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
+	if (ret != 0)
+		goto free_newinfo;
 
-	(*i)++;
+	duprintf("ip_tables: Translated table\n");
+
+	ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+			   tmp.num_counters, tmp.counters);
+	if (ret)
+		goto free_newinfo_untrans;
 	return 0;
+
+ free_newinfo_untrans:
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		cleanup_entry(iter, net);
+ free_newinfo:
+	xt_free_table_info(newinfo);
+	return ret;
 }
 
 static int
-do_add_counters(void __user *user, unsigned int len)
+do_add_counters(struct net *net, const void __user *user, unsigned int len,
+		int compat)
 {
-	unsigned int i;
-	struct ip6t_counters_info tmp, *paddc;
-	struct ip6t_table *t;
+	unsigned int i, curcpu;
+	struct xt_counters_info tmp;
+	struct xt_counters *paddc;
+	unsigned int num_counters;
+	char *name;
+	int size;
+	void *ptmp;
+	struct xt_table *t;
+	const struct xt_table_info *private;
 	int ret = 0;
+	const void *loc_cpu_entry;
+	struct ip6t_entry *iter;
+	unsigned int addend;
+#ifdef CONFIG_COMPAT
+	struct compat_xt_counters_info compat_tmp;
+
+	if (compat) {
+		ptmp = &compat_tmp;
+		size = sizeof(struct compat_xt_counters_info);
+	} else
+#endif
+	{
+		ptmp = &tmp;
+		size = sizeof(struct xt_counters_info);
+	}
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+	if (copy_from_user(ptmp, user, size) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ip6t_counters))
+#ifdef CONFIG_COMPAT
+	if (compat) {
+		num_counters = compat_tmp.num_counters;
+		name = compat_tmp.name;
+	} else
+#endif
+	{
+		num_counters = tmp.num_counters;
+		name = tmp.name;
+	}
+
+	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc(len);
+	paddc = vmalloc(len - size);
 	if (!paddc)
 		return -ENOMEM;
 
-	if (copy_from_user(paddc, user, len) != 0) {
+	if (copy_from_user(paddc, user + size, len - size) != 0) {
 		ret = -EFAULT;
 		goto free;
 	}
 
-	t = find_table_lock(tmp.name);
-	if (!t || IS_ERR(t)) {
+	t = xt_find_table_lock(net, AF_INET6, name);
+	if (IS_ERR_OR_NULL(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
 	}
 
-	write_lock_bh(&t->lock);
-	if (t->private->number != paddc->num_counters) {
+
+	local_bh_disable();
+	private = t->private;
+	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
 	i = 0;
-	IP6T_ENTRY_ITERATE(t->private->entries,
-			  t->private->size,
-			  add_counter_to_entry,
-			  paddc->counters,
-			  &i);
+	/* Choose the copy that is on our node */
+	curcpu = smp_processor_id();
+	addend = xt_write_recseq_begin();
+	loc_cpu_entry = private->entries[curcpu];
+	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+		++i;
+	}
+	xt_write_recseq_end(addend);
+
  unlock_up_free:
-	write_unlock_bh(&t->lock);
-	up(&ip6t_mutex);
+	local_bh_enable();
+	xt_table_unlock(t);
 	module_put(t->me);
  free:
 	vfree(paddc);
@@ -1365,469 +1398,764 @@ do_add_counters(void __user *user, unsigned int len)
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_ip6t_replace {
+	char			name[XT_TABLE_MAXNAMELEN];
+	u32			valid_hooks;
+	u32			num_entries;
+	u32			size;
+	u32			hook_entry[NF_INET_NUMHOOKS];
+	u32			underflow[NF_INET_NUMHOOKS];
+	u32			num_counters;
+	compat_uptr_t		counters;	/* struct xt_counters * */
+	struct compat_ip6t_entry entries[0];
+};
+
 static int
-do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
+			  unsigned int *size, struct xt_counters *counters,
+			  unsigned int i)
 {
-	int ret;
+	struct xt_entry_target *t;
+	struct compat_ip6t_entry __user *ce;
+	u_int16_t target_offset, next_offset;
+	compat_uint_t origsize;
+	const struct xt_entry_match *ematch;
+	int ret = 0;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
+	origsize = *size;
+	ce = (struct compat_ip6t_entry __user *)*dstptr;
+	if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 ||
+	    copy_to_user(&ce->counters, &counters[i],
+	    sizeof(counters[i])) != 0)
+		return -EFAULT;
 
-	switch (cmd) {
-	case IP6T_SO_SET_REPLACE:
-		ret = do_replace(user, len);
-		break;
+	*dstptr += sizeof(struct compat_ip6t_entry);
+	*size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 
-	case IP6T_SO_SET_ADD_COUNTERS:
-		ret = do_add_counters(user, len);
-		break;
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_to_user(ematch, dstptr, size);
+		if (ret != 0)
+			return ret;
+	}
+	target_offset = e->target_offset - (origsize - *size);
+	t = ip6t_get_target(e);
+	ret = xt_compat_target_to_user(t, dstptr, size);
+	if (ret)
+		return ret;
+	next_offset = e->next_offset - (origsize - *size);
+	if (put_user(target_offset, &ce->target_offset) != 0 ||
+	    put_user(next_offset, &ce->next_offset) != 0)
+		return -EFAULT;
+	return 0;
+}
 
-	default:
-		duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
-		ret = -EINVAL;
+static int
+compat_find_calc_match(struct xt_entry_match *m,
+		       const char *name,
+		       const struct ip6t_ip6 *ipv6,
+		       unsigned int hookmask,
+		       int *size)
+{
+	struct xt_match *match;
+
+	match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
+		duprintf("compat_check_calc_match: `%s' not found\n",
+			 m->u.user.name);
+		return PTR_ERR(match);
 	}
+	m->u.kernel.match = match;
+	*size += xt_compat_match_offset(match);
+	return 0;
+}
 
-	return ret;
+static void compat_release_entry(struct compat_ip6t_entry *e)
+{
+	struct xt_entry_target *t;
+	struct xt_entry_match *ematch;
+
+	/* Cleanup all matches */
+	xt_ematch_foreach(ematch, e)
+		module_put(ematch->u.kernel.match->me);
+	t = compat_ip6t_get_target(e);
+	module_put(t->u.kernel.target->me);
 }
 
 static int
-do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
+				  struct xt_table_info *newinfo,
+				  unsigned int *size,
+				  const unsigned char *base,
+				  const unsigned char *limit,
+				  const unsigned int *hook_entries,
+				  const unsigned int *underflows,
+				  const char *name)
 {
-	int ret;
+	struct xt_entry_match *ematch;
+	struct xt_entry_target *t;
+	struct xt_target *target;
+	unsigned int entry_offset;
+	unsigned int j;
+	int ret, off, h;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
+	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+	if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
+	    (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
+		duprintf("Bad offset %p, limit = %p\n", e, limit);
+		return -EINVAL;
+	}
 
-	switch (cmd) {
-	case IP6T_SO_GET_INFO: {
-		char name[IP6T_TABLE_MAXNAMELEN];
-		struct ip6t_table *t;
+	if (e->next_offset < sizeof(struct compat_ip6t_entry) +
+			     sizeof(struct compat_xt_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
 
-		if (*len != sizeof(struct ip6t_getinfo)) {
-			duprintf("length %u != %u\n", *len,
-				 sizeof(struct ip6t_getinfo));
-			ret = -EINVAL;
-			break;
-		}
+	/* For purposes of check_entry casting the compat entry is fine */
+	ret = check_entry((struct ip6t_entry *)e, name);
+	if (ret)
+		return ret;
 
-		if (copy_from_user(name, user, sizeof(name)) != 0) {
-			ret = -EFAULT;
-			break;
-		}
-		name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
-
-		t = try_then_request_module(find_table_lock(name),
-					    "ip6table_%s", name);
-		if (t && !IS_ERR(t)) {
-			struct ip6t_getinfo info;
-
-			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, t->private->hook_entry,
-			       sizeof(info.hook_entry));
-			memcpy(info.underflow, t->private->underflow,
-			       sizeof(info.underflow));
-			info.num_entries = t->private->number;
-			info.size = t->private->size;
-			memcpy(info.name, name, sizeof(info.name));
-
-			if (copy_to_user(user, &info, *len) != 0)
-				ret = -EFAULT;
-			else
-				ret = 0;
-			up(&ip6t_mutex);
-			module_put(t->me);
-		} else
-			ret = t ? PTR_ERR(t) : -ENOENT;
+	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+	entry_offset = (void *)e - (void *)base;
+	j = 0;
+	xt_ematch_foreach(ematch, e) {
+		ret = compat_find_calc_match(ematch, name,
+					     &e->ipv6, e->comefrom, &off);
+		if (ret != 0)
+			goto release_matches;
+		++j;
+	}
+
+	t = compat_ip6t_get_target(e);
+	target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
+		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+			 t->u.user.name);
+		ret = PTR_ERR(target);
+		goto release_matches;
 	}
-	break;
+	t->u.kernel.target = target;
 
-	case IP6T_SO_GET_ENTRIES: {
-		struct ip6t_get_entries get;
+	off += xt_compat_target_offset(target);
+	*size += off;
+	ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
+	if (ret)
+		goto out;
 
-		if (*len < sizeof(get)) {
-			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
-			ret = -EINVAL;
-		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
-			ret = -EFAULT;
-		} else if (*len != sizeof(struct ip6t_get_entries) + get.size) {
-			duprintf("get_entries: %u != %u\n", *len,
-				 sizeof(struct ip6t_get_entries) + get.size);
-			ret = -EINVAL;
-		} else
-			ret = get_entries(&get, user);
-		break;
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
 	}
 
-	case IP6T_SO_GET_REVISION_MATCH:
-	case IP6T_SO_GET_REVISION_TARGET: {
-		struct ip6t_get_revision rev;
-		int (*revfn)(const char *, u8, int *);
+	/* Clear counters and comefrom */
+	memset(&e->counters, 0, sizeof(e->counters));
+	e->comefrom = 0;
+	return 0;
 
-		if (*len != sizeof(rev)) {
-			ret = -EINVAL;
-			break;
-		}
-		if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
-			ret = -EFAULT;
+out:
+	module_put(t->u.kernel.target->me);
+release_matches:
+	xt_ematch_foreach(ematch, e) {
+		if (j-- == 0)
 			break;
-		}
-
-		if (cmd == IP6T_SO_GET_REVISION_TARGET)
-			revfn = target_revfn;
-		else
-			revfn = match_revfn;
-
-		try_then_request_module(find_revision(rev.name, rev.revision,
-						      revfn, &ret),
-					"ip6t_%s", rev.name);
-		break;
+		module_put(ematch->u.kernel.match->me);
 	}
+	return ret;
+}
 
-	default:
-		duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
-		ret = -EINVAL;
+static int
+compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
+			    unsigned int *size, const char *name,
+			    struct xt_table_info *newinfo, unsigned char *base)
+{
+	struct xt_entry_target *t;
+	struct ip6t_entry *de;
+	unsigned int origsize;
+	int ret, h;
+	struct xt_entry_match *ematch;
+
+	ret = 0;
+	origsize = *size;
+	de = (struct ip6t_entry *)*dstptr;
+	memcpy(de, e, sizeof(struct ip6t_entry));
+	memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+	*dstptr += sizeof(struct ip6t_entry);
+	*size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_from_user(ematch, dstptr, size);
+		if (ret != 0)
+			return ret;
+	}
+	de->target_offset = e->target_offset - (origsize - *size);
+	t = compat_ip6t_get_target(e);
+	xt_compat_target_from_user(t, dstptr, size);
+
+	de->next_offset = e->next_offset - (origsize - *size);
+	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+		if ((unsigned char *)de - base < newinfo->hook_entry[h])
+			newinfo->hook_entry[h] -= origsize - *size;
+		if ((unsigned char *)de - base < newinfo->underflow[h])
+			newinfo->underflow[h] -= origsize - *size;
 	}
-
 	return ret;
 }
 
-/* Registration hooks for targets. */
-int
-ip6t_register_target(struct ip6t_target *target)
+static int compat_check_entry(struct ip6t_entry *e, struct net *net,
+			      const char *name)
 {
-	int ret;
+	unsigned int j;
+	int ret = 0;
+	struct xt_mtchk_param mtpar;
+	struct xt_entry_match *ematch;
 
-	ret = down_interruptible(&ip6t_mutex);
-	if (ret != 0)
-		return ret;
-	list_add(&target->list, &ip6t_target);
-	up(&ip6t_mutex);
+	j = 0;
+	mtpar.net	= net;
+	mtpar.table     = name;
+	mtpar.entryinfo = &e->ipv6;
+	mtpar.hook_mask = e->comefrom;
+	mtpar.family    = NFPROTO_IPV6;
+	xt_ematch_foreach(ematch, e) {
+		ret = check_match(ematch, &mtpar);
+		if (ret != 0)
+			goto cleanup_matches;
+		++j;
+	}
+
+	ret = check_target(e, net, name);
+	if (ret)
+		goto cleanup_matches;
+	return 0;
+
+ cleanup_matches:
+	xt_ematch_foreach(ematch, e) {
+		if (j-- == 0)
+			break;
+		cleanup_match(ematch, net);
+	}
 	return ret;
 }
 
-void
-ip6t_unregister_target(struct ip6t_target *target)
+static int
+translate_compat_table(struct net *net,
+		       const char *name,
+		       unsigned int valid_hooks,
+		       struct xt_table_info **pinfo,
+		       void **pentry0,
+		       unsigned int total_size,
+		       unsigned int number,
+		       unsigned int *hook_entries,
+		       unsigned int *underflows)
 {
-	down(&ip6t_mutex);
-	LIST_DELETE(&ip6t_target, target);
-	up(&ip6t_mutex);
-}
+	unsigned int i, j;
+	struct xt_table_info *newinfo, *info;
+	void *pos, *entry0, *entry1;
+	struct compat_ip6t_entry *iter0;
+	struct ip6t_entry *iter1;
+	unsigned int size;
+	int ret = 0;
 
-int
-ip6t_register_match(struct ip6t_match *match)
-{
-	int ret;
+	info = *pinfo;
+	entry0 = *pentry0;
+	size = total_size;
+	info->number = number;
 
-	ret = down_interruptible(&ip6t_mutex);
-	if (ret != 0)
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+		info->hook_entry[i] = 0xFFFFFFFF;
+		info->underflow[i] = 0xFFFFFFFF;
+	}
+
+	duprintf("translate_compat_table: size %u\n", info->size);
+	j = 0;
+	xt_compat_lock(AF_INET6);
+	xt_compat_init_offsets(AF_INET6, number);
+	/* Walk through entries, checking offsets. */
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+							entry0,
+							entry0 + total_size,
+							hook_entries,
+							underflows,
+							name);
+		if (ret != 0)
+			goto out_unlock;
+		++j;
+	}
+
+	ret = -EINVAL;
+	if (j != number) {
+		duprintf("translate_compat_table: %u not %u entries\n",
+			 j, number);
+		goto out_unlock;
+	}
+
+	/* Check hooks all assigned */
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << i)))
+			continue;
+		if (info->hook_entry[i] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 i, hook_entries[i]);
+			goto out_unlock;
+		}
+		if (info->underflow[i] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 i, underflows[i]);
+			goto out_unlock;
+		}
+	}
+
+	ret = -ENOMEM;
+	newinfo = xt_alloc_table_info(size);
+	if (!newinfo)
+		goto out_unlock;
+
+	newinfo->number = number;
+	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = info->hook_entry[i];
+		newinfo->underflow[i] = info->underflow[i];
+	}
+	entry1 = newinfo->entries[raw_smp_processor_id()];
+	pos = entry1;
+	size = total_size;
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = compat_copy_entry_from_user(iter0, &pos, &size,
+						  name, newinfo, entry1);
+		if (ret != 0)
+			break;
+	}
+	xt_compat_flush_offsets(AF_INET6);
+	xt_compat_unlock(AF_INET6);
+	if (ret)
+		goto free_newinfo;
+
+	ret = -ELOOP;
+	if (!mark_source_chains(newinfo, valid_hooks, entry1))
+		goto free_newinfo;
+
+	i = 0;
+	xt_entry_foreach(iter1, entry1, newinfo->size) {
+		ret = compat_check_entry(iter1, net, name);
+		if (ret != 0)
+			break;
+		++i;
+		if (strcmp(ip6t_get_target(iter1)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
+	}
+	if (ret) {
+		/*
+		 * The first i matches need cleanup_entry (calls ->destroy)
+		 * because they had called ->check already. The other j-i
+		 * entries need only release.
+		 */
+		int skip = i;
+		j -= i;
+		xt_entry_foreach(iter0, entry0, newinfo->size) {
+			if (skip-- > 0)
+				continue;
+			if (j-- == 0)
+				break;
+			compat_release_entry(iter0);
+		}
+		xt_entry_foreach(iter1, entry1, newinfo->size) {
+			if (i-- == 0)
+				break;
+			cleanup_entry(iter1, net);
+		}
+		xt_free_table_info(newinfo);
 		return ret;
+	}
 
-	list_add(&match->list, &ip6t_match);
-	up(&ip6t_mutex);
+	/* And one copy for every other CPU */
+	for_each_possible_cpu(i)
+		if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+			memcpy(newinfo->entries[i], entry1, newinfo->size);
 
-	return ret;
-}
+	*pinfo = newinfo;
+	*pentry0 = entry1;
+	xt_free_table_info(info);
+	return 0;
 
-void
-ip6t_unregister_match(struct ip6t_match *match)
-{
-	down(&ip6t_mutex);
-	LIST_DELETE(&ip6t_match, match);
-	up(&ip6t_mutex);
+free_newinfo:
+	xt_free_table_info(newinfo);
+out:
+	xt_entry_foreach(iter0, entry0, total_size) {
+		if (j-- == 0)
+			break;
+		compat_release_entry(iter0);
+	}
+	return ret;
+out_unlock:
+	xt_compat_flush_offsets(AF_INET6);
+	xt_compat_unlock(AF_INET6);
+	goto out;
 }
 
-int ip6t_register_table(struct ip6t_table *table,
-			const struct ip6t_replace *repl)
+static int
+compat_do_replace(struct net *net, void __user *user, unsigned int len)
 {
 	int ret;
-	struct ip6t_table_info *newinfo;
-	static struct ip6t_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct compat_ip6t_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+	struct ip6t_entry *iter;
 
-	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(repl->size) *
-			  		(highest_possible_processor_id()+1));
-	if (!newinfo)
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* overflow check */
+	if (tmp.size >= INT_MAX / num_possible_cpus())
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	tmp.name[sizeof(tmp.name)-1] = 0;
 
-	memcpy(newinfo->entries, repl->entries, repl->size);
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
 
-	ret = translate_table(table->name, table->valid_hooks,
-			      newinfo, repl->size,
-			      repl->num_entries,
-			      repl->hook_entry,
-			      repl->underflow);
-	if (ret != 0) {
-		vfree(newinfo);
-		return ret;
+	/* choose the copy that is on our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
 	}
 
-	ret = down_interruptible(&ip6t_mutex);
-	if (ret != 0) {
-		vfree(newinfo);
-		return ret;
-	}
+	ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
+				     &newinfo, &loc_cpu_entry, tmp.size,
+				     tmp.num_entries, tmp.hook_entry,
+				     tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
 
-	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&ip6t_tables, table->name)) {
-		ret = -EEXIST;
-		goto free_unlock;
-	}
+	duprintf("compat_do_replace: Translated table\n");
 
-	/* Simplifies replace_table code. */
-	table->private = &bootstrap;
-	if (!replace_table(table, 0, newinfo, &ret))
-		goto free_unlock;
+	ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+			   tmp.num_counters, compat_ptr(tmp.counters));
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
 
-	duprintf("table->private->number = %u\n",
-		 table->private->number);
+ free_newinfo_untrans:
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		cleanup_entry(iter, net);
+ free_newinfo:
+	xt_free_table_info(newinfo);
+	return ret;
+}
 
-	/* save number of initial entries */
-	table->private->initial_entries = table->private->number;
+static int
+compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
+		       unsigned int len)
+{
+	int ret;
 
-	rwlock_init(&table->lock);
-	list_prepend(&ip6t_tables, table);
+	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
 
- unlock:
-	up(&ip6t_mutex);
-	return ret;
+	switch (cmd) {
+	case IP6T_SO_SET_REPLACE:
+		ret = compat_do_replace(sock_net(sk), user, len);
+		break;
 
- free_unlock:
-	vfree(newinfo);
-	goto unlock;
-}
+	case IP6T_SO_SET_ADD_COUNTERS:
+		ret = do_add_counters(sock_net(sk), user, len, 1);
+		break;
 
-void ip6t_unregister_table(struct ip6t_table *table)
-{
-	down(&ip6t_mutex);
-	LIST_DELETE(&ip6t_tables, table);
-	up(&ip6t_mutex);
+	default:
+		duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
 
-	/* Decrease module usage counts and free resources */
-	IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
-			  cleanup_entry, NULL);
-	vfree(table->private);
+	return ret;
 }
 
-/* Returns 1 if the port is matched by the range, 0 otherwise */
-static inline int
-port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
+struct compat_ip6t_get_entries {
+	char name[XT_TABLE_MAXNAMELEN];
+	compat_uint_t size;
+	struct compat_ip6t_entry entrytable[0];
+};
+
+static int
+compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
+			    void __user *userptr)
 {
-	int ret;
+	struct xt_counters *counters;
+	const struct xt_table_info *private = table->private;
+	void __user *pos;
+	unsigned int size;
+	int ret = 0;
+	const void *loc_cpu_entry;
+	unsigned int i = 0;
+	struct ip6t_entry *iter;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
+	/* choose the copy that is on our node/cpu, ...
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu)
+	 */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	pos = userptr;
+	size = total_size;
+	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+		ret = compat_copy_entry_to_user(iter, &pos,
+						&size, counters, i++);
+		if (ret != 0)
+			break;
+	}
 
-	ret = (port >= min && port <= max) ^ invert;
+	vfree(counters);
 	return ret;
 }
 
 static int
-tcp_find_option(u_int8_t option,
-		const struct sk_buff *skb,
-		unsigned int tcpoff,
-		unsigned int optlen,
-		int invert,
-		int *hotdrop)
+compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
+		   int *len)
 {
-	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
-	u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
-	unsigned int i;
+	int ret;
+	struct compat_ip6t_get_entries get;
+	struct xt_table *t;
 
-	duprintf("tcp_match: finding option\n");
-	if (!optlen)
-		return invert;
-	/* If we don't have the whole header, drop packet. */
-	op = skb_header_pointer(skb, tcpoff + sizeof(struct tcphdr), optlen,
-				_opt);
-	if (op == NULL) {
-		*hotdrop = 1;
-		return 0;
+	if (*len < sizeof(get)) {
+		duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+		return -EINVAL;
 	}
 
-	for (i = 0; i < optlen; ) {
-		if (op[i] == option) return !invert;
-		if (op[i] < 2) i++;
-		else i += op[i+1]?:1;
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+
+	if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) {
+		duprintf("compat_get_entries: %u != %zu\n",
+			 *len, sizeof(get) + get.size);
+		return -EINVAL;
 	}
 
-	return invert;
+	xt_compat_lock(AF_INET6);
+	t = xt_find_table_lock(net, AF_INET6, get.name);
+	if (!IS_ERR_OR_NULL(t)) {
+		const struct xt_table_info *private = t->private;
+		struct xt_table_info info;
+		duprintf("t->private->number = %u\n", private->number);
+		ret = compat_table_info(private, &info);
+		if (!ret && get.size == info.size) {
+			ret = compat_copy_entries_to_user(private->size,
+							  t, uptr->entrytable);
+		} else if (!ret) {
+			duprintf("compat_get_entries: I've got %u not %u!\n",
+				 private->size, get.size);
+			ret = -EAGAIN;
+		}
+		xt_compat_flush_offsets(AF_INET6);
+		module_put(t->me);
+		xt_table_unlock(t);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+
+	xt_compat_unlock(AF_INET6);
+	return ret;
 }
 
+static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *);
+
 static int
-tcp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const void *matchinfo,
-	  int offset,
-	  unsigned int protoff,
-	  int *hotdrop)
+compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 {
-	struct tcphdr _tcph, *th;
-	const struct ip6t_tcp *tcpinfo = matchinfo;
-
-	if (offset) {
-		/* To quote Alan:
-
-		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
-		   causes this. Its a cracker trying to break in by doing a
-		   flag overwrite to pass the direction checks.
-		*/
-		if (offset == 1) {
-			duprintf("Dropping evil TCP offset=1 frag.\n");
-			*hotdrop = 1;
-		}
-		/* Must not be a fragment. */
-		return 0;
-	}
+	int ret;
 
-#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
+	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
 
-	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-	}
-
-	if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
-			ntohs(th->source),
-			!!(tcpinfo->invflags & IP6T_TCP_INV_SRCPT)))
-		return 0;
-	if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
-			ntohs(th->dest),
-			!!(tcpinfo->invflags & IP6T_TCP_INV_DSTPT)))
-		return 0;
-	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
-		      == tcpinfo->flg_cmp,
-		      IP6T_TCP_INV_FLAGS))
-		return 0;
-	if (tcpinfo->option) {
-		if (th->doff * 4 < sizeof(_tcph)) {
-			*hotdrop = 1;
-			return 0;
-		}
-		if (!tcp_find_option(tcpinfo->option, skb, protoff,
-				     th->doff*4 - sizeof(*th),
-				     tcpinfo->invflags & IP6T_TCP_INV_OPTION,
-				     hotdrop))
-			return 0;
+	switch (cmd) {
+	case IP6T_SO_GET_INFO:
+		ret = get_info(sock_net(sk), user, len, 1);
+		break;
+	case IP6T_SO_GET_ENTRIES:
+		ret = compat_get_entries(sock_net(sk), user, len);
+		break;
+	default:
+		ret = do_ip6t_get_ctl(sk, cmd, user, len);
 	}
-	return 1;
+	return ret;
 }
+#endif
 
-/* Called when user tries to insert an entry of this type. */
 static int
-tcp_checkentry(const char *tablename,
-	       const struct ip6t_ip6 *ipv6,
-	       void *matchinfo,
-	       unsigned int matchsize,
-	       unsigned int hook_mask)
+do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
-	const struct ip6t_tcp *tcpinfo = matchinfo;
+	int ret;
+
+	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case IP6T_SO_SET_REPLACE:
+		ret = do_replace(sock_net(sk), user, len);
+		break;
+
+	case IP6T_SO_SET_ADD_COUNTERS:
+		ret = do_add_counters(sock_net(sk), user, len, 0);
+		break;
 
-	/* Must specify proto == TCP, and no unknown invflags */
-	return ipv6->proto == IPPROTO_TCP
-		&& !(ipv6->invflags & IP6T_INV_PROTO)
-		&& matchsize == IP6T_ALIGN(sizeof(struct ip6t_tcp))
-		&& !(tcpinfo->invflags & ~IP6T_TCP_INV_MASK);
+	default:
+		duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
 }
 
 static int
-udp_match(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const void *matchinfo,
-	  int offset,
-	  unsigned int protoff,
-	  int *hotdrop)
+do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 {
-	struct udphdr _udph, *uh;
-	const struct ip6t_udp *udpinfo = matchinfo;
+	int ret;
 
-	/* Must not be a fragment. */
-	if (offset)
-		return 0;
+	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
 
-	uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
-	if (uh == NULL) {
-		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil UDP tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-	}
-
-	return port_match(udpinfo->spts[0], udpinfo->spts[1],
-			  ntohs(uh->source),
-			  !!(udpinfo->invflags & IP6T_UDP_INV_SRCPT))
-		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
-			      ntohs(uh->dest),
-			      !!(udpinfo->invflags & IP6T_UDP_INV_DSTPT));
+	switch (cmd) {
+	case IP6T_SO_GET_INFO:
+		ret = get_info(sock_net(sk), user, len, 0);
+		break;
+
+	case IP6T_SO_GET_ENTRIES:
+		ret = get_entries(sock_net(sk), user, len);
+		break;
+
+	case IP6T_SO_GET_REVISION_MATCH:
+	case IP6T_SO_GET_REVISION_TARGET: {
+		struct xt_get_revision rev;
+		int target;
+
+		if (*len != sizeof(rev)) {
+			ret = -EINVAL;
+			break;
+		}
+		if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+			ret = -EFAULT;
+			break;
+		}
+		rev.name[sizeof(rev.name)-1] = 0;
+
+		if (cmd == IP6T_SO_GET_REVISION_TARGET)
+			target = 1;
+		else
+			target = 0;
+
+		try_then_request_module(xt_find_revision(AF_INET6, rev.name,
+							 rev.revision,
+							 target, &ret),
+					"ip6t_%s", rev.name);
+		break;
+	}
+
+	default:
+		duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
 }
 
-/* Called when user tries to insert an entry of this type. */
-static int
-udp_checkentry(const char *tablename,
-	       const struct ip6t_ip6 *ipv6,
-	       void *matchinfo,
-	       unsigned int matchinfosize,
-	       unsigned int hook_mask)
+struct xt_table *ip6t_register_table(struct net *net,
+				     const struct xt_table *table,
+				     const struct ip6t_replace *repl)
 {
-	const struct ip6t_udp *udpinfo = matchinfo;
+	int ret;
+	struct xt_table_info *newinfo;
+	struct xt_table_info bootstrap = {0};
+	void *loc_cpu_entry;
+	struct xt_table *new_table;
 
-	/* Must specify proto == UDP, and no unknown invflags */
-	if (ipv6->proto != IPPROTO_UDP || (ipv6->invflags & IP6T_INV_PROTO)) {
-		duprintf("ip6t_udp: Protocol %u != %u\n", ipv6->proto,
-			 IPPROTO_UDP);
-		return 0;
-	}
-	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_udp))) {
-		duprintf("ip6t_udp: matchsize %u != %u\n",
-			 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_udp)));
-		return 0;
+	newinfo = xt_alloc_table_info(repl->size);
+	if (!newinfo) {
+		ret = -ENOMEM;
+		goto out;
 	}
-	if (udpinfo->invflags & ~IP6T_UDP_INV_MASK) {
-		duprintf("ip6t_udp: unknown flags %X\n",
-			 udpinfo->invflags);
-		return 0;
+
+	/* choose the copy on our node/cpu, but dont care about preemption */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	memcpy(loc_cpu_entry, repl->entries, repl->size);
+
+	ret = translate_table(net, newinfo, loc_cpu_entry, repl);
+	if (ret != 0)
+		goto out_free;
+
+	new_table = xt_register_table(net, table, &bootstrap, newinfo);
+	if (IS_ERR(new_table)) {
+		ret = PTR_ERR(new_table);
+		goto out_free;
 	}
+	return new_table;
 
-	return 1;
+out_free:
+	xt_free_table_info(newinfo);
+out:
+	return ERR_PTR(ret);
+}
+
+void ip6t_unregister_table(struct net *net, struct xt_table *table)
+{
+	struct xt_table_info *private;
+	void *loc_cpu_entry;
+	struct module *table_owner = table->me;
+	struct ip6t_entry *iter;
+
+	private = xt_unregister_table(table);
+
+	/* Decrease module usage counts and free resources */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		cleanup_entry(iter, net);
+	if (private->number > private->initial_entries)
+		module_put(table_owner);
+	xt_free_table_info(private);
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
-static inline int
+static inline bool
 icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 		     u_int8_t type, u_int8_t code,
-		     int invert)
+		     bool invert)
 {
 	return (type == test_type && code >= min_code && code <= max_code)
 		^ invert;
 }
 
-static int
-icmp6_match(const struct sk_buff *skb,
-	   const struct net_device *in,
-	   const struct net_device *out,
-	   const void *matchinfo,
-	   int offset,
-	   unsigned int protoff,
-	   int *hotdrop)
+static bool
+icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct icmp6hdr _icmp, *ic;
-	const struct ip6t_icmp *icmpinfo = matchinfo;
+	const struct icmp6hdr *ic;
+	struct icmp6hdr _icmph;
+	const struct ip6t_icmp *icmpinfo = par->matchinfo;
 
 	/* Must not be a fragment. */
-	if (offset)
-		return 0;
+	if (par->fragoff != 0)
+		return false;
 
-	ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp);
+	ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
 	if (ic == NULL) {
 		/* We've been asked to examine this packet, and we
-		   can't.  Hence, no choice but to drop. */
+		 * can't.  Hence, no choice but to drop.
+		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*hotdrop = 1;
-		return 0;
+		par->hotdrop = true;
+		return false;
 	}
 
 	return icmp6_type_code_match(icmpinfo->type,
@@ -1838,30 +2166,32 @@ icmp6_match(const struct sk_buff *skb,
 }
 
 /* Called when user tries to insert an entry of this type. */
-static int
-icmp6_checkentry(const char *tablename,
-	   const struct ip6t_ip6 *ipv6,
-	   void *matchinfo,
-	   unsigned int matchsize,
-	   unsigned int hook_mask)
+static int icmp6_checkentry(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_icmp *icmpinfo = matchinfo;
+	const struct ip6t_icmp *icmpinfo = par->matchinfo;
 
-	/* Must specify proto == ICMP, and no unknown invflags */
-	return ipv6->proto == IPPROTO_ICMPV6
-		&& !(ipv6->invflags & IP6T_INV_PROTO)
-		&& matchsize == IP6T_ALIGN(sizeof(struct ip6t_icmp))
-		&& !(icmpinfo->invflags & ~IP6T_ICMP_INV);
+	/* Must specify no unknown invflags */
+	return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0;
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct ip6t_target ip6t_standard_target = {
-	.name		= IP6T_STANDARD_TARGET,
-};
-
-static struct ip6t_target ip6t_error_target = {
-	.name		= IP6T_ERROR_TARGET,
-	.target		= ip6t_error,
+static struct xt_target ip6t_builtin_tg[] __read_mostly = {
+	{
+		.name             = XT_STANDARD_TARGET,
+		.targetsize       = sizeof(int),
+		.family           = NFPROTO_IPV6,
+#ifdef CONFIG_COMPAT
+		.compatsize       = sizeof(compat_int_t),
+		.compat_from_user = compat_standard_from_user,
+		.compat_to_user   = compat_standard_to_user,
+#endif
+	},
+	{
+		.name             = XT_ERROR_TARGET,
+		.target           = ip6t_error,
+		.targetsize       = XT_FUNCTION_MAXNAMELEN,
+		.family           = NFPROTO_IPV6,
+	},
 };
 
 static struct nf_sockopt_ops ip6t_sockopts = {
@@ -1869,231 +2199,90 @@ static struct nf_sockopt_ops ip6t_sockopts = {
 	.set_optmin	= IP6T_BASE_CTL,
 	.set_optmax	= IP6T_SO_SET_MAX+1,
 	.set		= do_ip6t_set_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_set	= compat_do_ip6t_set_ctl,
+#endif
 	.get_optmin	= IP6T_BASE_CTL,
 	.get_optmax	= IP6T_SO_GET_MAX+1,
 	.get		= do_ip6t_get_ctl,
+#ifdef CONFIG_COMPAT
+	.compat_get	= compat_do_ip6t_get_ctl,
+#endif
+	.owner		= THIS_MODULE,
 };
 
-static struct ip6t_match tcp_matchstruct = {
-	.name		= "tcp",
-	.match		= &tcp_match,
-	.checkentry	= &tcp_checkentry,
-};
-
-static struct ip6t_match udp_matchstruct = {
-	.name		= "udp",
-	.match		= &udp_match,
-	.checkentry	= &udp_checkentry,
-};
-
-static struct ip6t_match icmp6_matchstruct = {
-	.name		= "icmp6",
-	.match		= &icmp6_match,
-	.checkentry	= &icmp6_checkentry,
+static struct xt_match ip6t_builtin_mt[] __read_mostly = {
+	{
+		.name       = "icmp6",
+		.match      = icmp6_match,
+		.matchsize  = sizeof(struct ip6t_icmp),
+		.checkentry = icmp6_checkentry,
+		.proto      = IPPROTO_ICMPV6,
+		.family     = NFPROTO_IPV6,
+	},
 };
 
-#ifdef CONFIG_PROC_FS
-static inline int print_name(const char *i,
-			     off_t start_offset, char *buffer, int length,
-			     off_t *pos, unsigned int *count)
-{
-	if ((*count)++ >= start_offset) {
-		unsigned int namelen;
-
-		namelen = sprintf(buffer + *pos, "%s\n",
-				  i + sizeof(struct list_head));
-		if (*pos + namelen > length) {
-			/* Stop iterating */
-			return 1;
-		}
-		*pos += namelen;
-	}
-	return 0;
-}
-
-static inline int print_target(const struct ip6t_target *t,
-                               off_t start_offset, char *buffer, int length,
-                               off_t *pos, unsigned int *count)
+static int __net_init ip6_tables_net_init(struct net *net)
 {
-	if (t == &ip6t_standard_target || t == &ip6t_error_target)
-		return 0;
-	return print_name((char *)t, start_offset, buffer, length, pos, count);
+	return xt_proto_init(net, NFPROTO_IPV6);
 }
 
-static int ip6t_get_tables(char *buffer, char **start, off_t offset, int length)
+static void __net_exit ip6_tables_net_exit(struct net *net)
 {
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ip6t_tables, print_name, char *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ip6t_mutex);
-
-	/* `start' hack - see fs/proc/generic.c line ~105 */
-	*start=(char *)((unsigned long)count-offset);
-	return pos;
+	xt_proto_fini(net, NFPROTO_IPV6);
 }
 
-static int ip6t_get_targets(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ip6t_target, print_target, struct ip6t_target *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ip6t_mutex);
-
-	*start = (char *)((unsigned long)count - offset);
-	return pos;
-}
-
-static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length)
-{
-	off_t pos = 0;
-	unsigned int count = 0;
-
-	if (down_interruptible(&ip6t_mutex) != 0)
-		return 0;
-
-	LIST_FIND(&ip6t_match, print_name, char *,
-		  offset, buffer, length, &pos, &count);
-
-	up(&ip6t_mutex);
-
-	*start = (char *)((unsigned long)count - offset);
-	return pos;
-}
-
-static struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] =
-{ { "ip6_tables_names", ip6t_get_tables },
-  { "ip6_tables_targets", ip6t_get_targets },
-  { "ip6_tables_matches", ip6t_get_matches },
-  { NULL, NULL} };
-#endif /*CONFIG_PROC_FS*/
+static struct pernet_operations ip6_tables_net_ops = {
+	.init = ip6_tables_net_init,
+	.exit = ip6_tables_net_exit,
+};
 
-static int __init init(void)
+static int __init ip6_tables_init(void)
 {
 	int ret;
 
-	/* Noone else will be downing sem now, so we won't sleep */
-	down(&ip6t_mutex);
-	list_append(&ip6t_target, &ip6t_standard_target);
-	list_append(&ip6t_target, &ip6t_error_target);
-	list_append(&ip6t_match, &tcp_matchstruct);
-	list_append(&ip6t_match, &udp_matchstruct);
-	list_append(&ip6t_match, &icmp6_matchstruct);
-	up(&ip6t_mutex);
+	ret = register_pernet_subsys(&ip6_tables_net_ops);
+	if (ret < 0)
+		goto err1;
+
+	/* No one else will be downing sem now, so we won't sleep */
+	ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
+	if (ret < 0)
+		goto err2;
+	ret = xt_register_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
+	if (ret < 0)
+		goto err4;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ip6t_sockopts);
-	if (ret < 0) {
-		duprintf("Unable to register sockopts.\n");
-		return ret;
-	}
-
-#ifdef CONFIG_PROC_FS
-	{
-		struct proc_dir_entry *proc;
-		int i;
-
-		for (i = 0; ip6t_proc_entry[i].name; i++) {
-			proc = proc_net_create(ip6t_proc_entry[i].name, 0,
-					       ip6t_proc_entry[i].get_info);
-			if (!proc) {
-				while (--i >= 0)
-				       proc_net_remove(ip6t_proc_entry[i].name);
-				nf_unregister_sockopt(&ip6t_sockopts);
-				return -ENOMEM;
-			}
-			proc->owner = THIS_MODULE;
-		}
-	}
-#endif
+	if (ret < 0)
+		goto err5;
 
-	printk("ip6_tables: (C) 2000-2002 Netfilter core team\n");
+	pr_info("(C) 2000-2006 Netfilter Core Team\n");
 	return 0;
-}
 
-static void __exit fini(void)
-{
-	nf_unregister_sockopt(&ip6t_sockopts);
-#ifdef CONFIG_PROC_FS
-	{
-		int i;
-		for (i = 0; ip6t_proc_entry[i].name; i++)
-			proc_net_remove(ip6t_proc_entry[i].name);
-	}
-#endif
+err5:
+	xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
+err4:
+	xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
+err2:
+	unregister_pernet_subsys(&ip6_tables_net_ops);
+err1:
+	return ret;
 }
 
-/*
- * find specified header up to transport protocol header.
- * If found target header, the offset to the header is set to *offset
- * and return 0. otherwise, return -1.
- *
- * Notes: - non-1st Fragment Header isn't skipped.
- *	  - ESP header isn't skipped.
- *	  - The target header may be trancated.
- */
-int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
+static void __exit ip6_tables_fini(void)
 {
-	unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
-	u8 nexthdr = skb->nh.ipv6h->nexthdr;
-	unsigned int len = skb->len - start;
-
-	while (nexthdr != target) {
-		struct ipv6_opt_hdr _hdr, *hp;
-		unsigned int hdrlen;
-
-		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
-			return -1;
-		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
-		if (hp == NULL)
-			return -1;
-		if (nexthdr == NEXTHDR_FRAGMENT) {
-			unsigned short _frag_off, *fp;
-			fp = skb_header_pointer(skb,
-						start+offsetof(struct frag_hdr,
-							       frag_off),
-						sizeof(_frag_off),
-						&_frag_off);
-			if (fp == NULL)
-				return -1;
-
-			if (ntohs(*fp) & ~0x7)
-				return -1;
-			hdrlen = 8;
-		} else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen + 2) << 2; 
-		else
-			hdrlen = ipv6_optlen(hp); 
-
-		nexthdr = hp->nexthdr;
-		len -= hdrlen;
-		start += hdrlen;
-	}
+	nf_unregister_sockopt(&ip6t_sockopts);
 
-	*offset = start;
-	return 0;
+	xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
+	xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
+	unregister_pernet_subsys(&ip6_tables_net_ops);
 }
 
 EXPORT_SYMBOL(ip6t_register_table);
 EXPORT_SYMBOL(ip6t_unregister_table);
 EXPORT_SYMBOL(ip6t_do_table);
-EXPORT_SYMBOL(ip6t_register_match);
-EXPORT_SYMBOL(ip6t_unregister_match);
-EXPORT_SYMBOL(ip6t_register_target);
-EXPORT_SYMBOL(ip6t_unregister_target);
-EXPORT_SYMBOL(ip6t_ext_hdr);
-EXPORT_SYMBOL(ipv6_find_hdr);
-
-module_init(init);
-module_exit(fini);
+
+module_init(ip6_tables_init);
+module_exit(ip6_tables_fini);
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
deleted file mode 100644
index 8f5549b7272..00000000000
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/* 
- * Hop Limit modification target for ip6tables
- * Maciej Soltysiak <solt@dns.toxicfilms.tv>
- * Based on HW's TTL module
- *
- * This software is distributed under the terms of GNU GPL
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_HL.h>
-
-MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
-MODULE_DESCRIPTION("IP tables Hop Limit modification module");
-MODULE_LICENSE("GPL");
-
-static unsigned int ip6t_hl_target(struct sk_buff **pskb, 
-				   const struct net_device *in,
-				   const struct net_device *out,
-				   unsigned int hooknum,
-				   const void *targinfo, void *userinfo)
-{
-	struct ipv6hdr *ip6h;
-	const struct ip6t_HL_info *info = targinfo;
-	u_int16_t diffs[2];
-	int new_hl;
-
-	if (!skb_make_writable(pskb, (*pskb)->len))
-		return NF_DROP;
-
-	ip6h = (*pskb)->nh.ipv6h;
-
-	switch (info->mode) {
-		case IP6T_HL_SET:
-			new_hl = info->hop_limit;
-			break;
-		case IP6T_HL_INC:
-			new_hl = ip6h->hop_limit + info->hop_limit;
-			if (new_hl > 255)
-				new_hl = 255;
-			break;
-		case IP6T_HL_DEC:
-			new_hl = ip6h->hop_limit - info->hop_limit;
-			if (new_hl < 0)
-				new_hl = 0;
-			break;
-		default:
-			new_hl = ip6h->hop_limit;
-			break;
-	}
-
-	if (new_hl != ip6h->hop_limit) {
-		diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF;
-		ip6h->hop_limit = new_hl;
-		diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8);
-	}
-
-	return IP6T_CONTINUE;
-}
-
-static int ip6t_hl_checkentry(const char *tablename,
-		const struct ip6t_entry *e,
-		void *targinfo,
-		unsigned int targinfosize,
-		unsigned int hook_mask)
-{
-	struct ip6t_HL_info *info = targinfo;
-
-	if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_HL_info))) {
-		printk(KERN_WARNING "ip6t_HL: targinfosize %u != %Zu\n",
-				targinfosize,
-				IP6T_ALIGN(sizeof(struct ip6t_HL_info)));
-		return 0;	
-	}	
-
-	if (strcmp(tablename, "mangle")) {
-		printk(KERN_WARNING "ip6t_HL: can only be called from "
-			"\"mangle\" table, not \"%s\"\n", tablename);
-		return 0;
-	}
-
-	if (info->mode > IP6T_HL_MAXMODE) {
-		printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", 
-			info->mode);
-		return 0;
-	}
-
-	if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) {
-		printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't "
-			"make sense with value 0\n");
-		return 0;
-	}
-	
-	return 1;
-}
-
-static struct ip6t_target ip6t_HL = { 
-	.name 		= "HL", 
-	.target		= ip6t_hl_target, 
-	.checkentry	= ip6t_hl_checkentry, 
-	.me		= THIS_MODULE
-};
-
-static int __init init(void)
-{
-	return ip6t_register_target(&ip6t_HL);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_target(&ip6t_HL);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
deleted file mode 100644
index 0cd1d1bd903..00000000000
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- * This is a module which is used for logging packets.
- */
-
-/* (C) 2001 Jan Rekorajski <baggins@pld.org.pl>
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/spinlock.h>
-#include <linux/icmpv6.h>
-#include <net/udp.h>
-#include <net/tcp.h>
-#include <net/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
-MODULE_DESCRIPTION("IP6 tables LOG target module");
-MODULE_LICENSE("GPL");
-
-struct in_device;
-#include <net/route.h>
-#include <linux/netfilter_ipv6/ip6t_LOG.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Use lock to serialize, so printks don't overlap */
-static DEFINE_SPINLOCK(log_lock);
-
-/* One level of recursion won't kill us */
-static void dump_packet(const struct nf_loginfo *info,
-			const struct sk_buff *skb, unsigned int ip6hoff,
-			int recurse)
-{
-	u_int8_t currenthdr;
-	int fragment;
-	struct ipv6hdr _ip6h, *ih;
-	unsigned int ptr;
-	unsigned int hdrlen = 0;
-	unsigned int logflags;
-
-	if (info->type == NF_LOG_TYPE_LOG)
-		logflags = info->u.log.logflags;
-	else
-		logflags = NF_LOG_MASK;
-
-	ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
-	if (ih == NULL) {
-		printk("TRUNCATED");
-		return;
-	}
-
-	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000" */
-	printk("SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->saddr));
-	printk("DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->daddr));
-
-	/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
-	printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
-	       ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
-	       (ntohl(*(u_int32_t *)ih) & 0x0ff00000) >> 20,
-	       ih->hop_limit,
-	       (ntohl(*(u_int32_t *)ih) & 0x000fffff));
-
-	fragment = 0;
-	ptr = ip6hoff + sizeof(struct ipv6hdr);
-	currenthdr = ih->nexthdr;
-	while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
-		struct ipv6_opt_hdr _hdr, *hp;
-
-		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-		if (hp == NULL) {
-			printk("TRUNCATED");
-			return;
-		}
-
-		/* Max length: 48 "OPT (...) " */
-		if (logflags & IP6T_LOG_IPOPT)
-			printk("OPT ( ");
-
-		switch (currenthdr) {
-		case IPPROTO_FRAGMENT: {
-			struct frag_hdr _fhdr, *fh;
-
-			printk("FRAG:");
-			fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
-						&_fhdr);
-			if (fh == NULL) {
-				printk("TRUNCATED ");
-				return;
-			}
-
-			/* Max length: 6 "65535 " */
-			printk("%u ", ntohs(fh->frag_off) & 0xFFF8);
-
-			/* Max length: 11 "INCOMPLETE " */
-			if (fh->frag_off & htons(0x0001))
-				printk("INCOMPLETE ");
-
-			printk("ID:%08x ", ntohl(fh->identification));
-
-			if (ntohs(fh->frag_off) & 0xFFF8)
-				fragment = 1;
-
-			hdrlen = 8;
-
-			break;
-		}
-		case IPPROTO_DSTOPTS:
-		case IPPROTO_ROUTING:
-		case IPPROTO_HOPOPTS:
-			if (fragment) {
-				if (logflags & IP6T_LOG_IPOPT)
-					printk(")");
-				return;
-			}
-			hdrlen = ipv6_optlen(hp);
-			break;
-		/* Max Length */
-		case IPPROTO_AH:
-			if (logflags & IP6T_LOG_IPOPT) {
-				struct ip_auth_hdr _ahdr, *ah;
-
-				/* Max length: 3 "AH " */
-				printk("AH ");
-
-				if (fragment) {
-					printk(")");
-					return;
-				}
-
-				ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
-							&_ahdr);
-				if (ah == NULL) {
-					/*
-					 * Max length: 26 "INCOMPLETE [65535 	
-					 *  bytes] )"
-					 */
-					printk("INCOMPLETE [%u bytes] )",
-					       skb->len - ptr);
-					return;
-				}
-
-				/* Length: 15 "SPI=0xF1234567 */
-				printk("SPI=0x%x ", ntohl(ah->spi));
-
-			}
-
-			hdrlen = (hp->hdrlen+2)<<2;
-			break;
-		case IPPROTO_ESP:
-			if (logflags & IP6T_LOG_IPOPT) {
-				struct ip_esp_hdr _esph, *eh;
-
-				/* Max length: 4 "ESP " */
-				printk("ESP ");
-
-				if (fragment) {
-					printk(")");
-					return;
-				}
-
-				/*
-				 * Max length: 26 "INCOMPLETE [65535 bytes] )"
-				 */
-				eh = skb_header_pointer(skb, ptr, sizeof(_esph),
-							&_esph);
-				if (eh == NULL) {
-					printk("INCOMPLETE [%u bytes] )",
-					       skb->len - ptr);
-					return;
-				}
-
-				/* Length: 16 "SPI=0xF1234567 )" */
-				printk("SPI=0x%x )", ntohl(eh->spi) );
-
-			}
-			return;
-		default:
-			/* Max length: 20 "Unknown Ext Hdr 255" */
-			printk("Unknown Ext Hdr %u", currenthdr);
-			return;
-		}
-		if (logflags & IP6T_LOG_IPOPT)
-			printk(") ");
-
-		currenthdr = hp->nexthdr;
-		ptr += hdrlen;
-	}
-
-	switch (currenthdr) {
-	case IPPROTO_TCP: {
-		struct tcphdr _tcph, *th;
-
-		/* Max length: 10 "PROTO=TCP " */
-		printk("PROTO=TCP ");
-
-		if (fragment)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
-		if (th == NULL) {
-			printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
-			return;
-		}
-
-		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u ",
-		       ntohs(th->source), ntohs(th->dest));
-		/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
-		if (logflags & IP6T_LOG_TCPSEQ)
-			printk("SEQ=%u ACK=%u ",
-			       ntohl(th->seq), ntohl(th->ack_seq));
-		/* Max length: 13 "WINDOW=65535 " */
-		printk("WINDOW=%u ", ntohs(th->window));
-		/* Max length: 9 "RES=0x3C " */
-		printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
-		/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
-		if (th->cwr)
-			printk("CWR ");
-		if (th->ece)
-			printk("ECE ");
-		if (th->urg)
-			printk("URG ");
-		if (th->ack)
-			printk("ACK ");
-		if (th->psh)
-			printk("PSH ");
-		if (th->rst)
-			printk("RST ");
-		if (th->syn)
-			printk("SYN ");
-		if (th->fin)
-			printk("FIN ");
-		/* Max length: 11 "URGP=65535 " */
-		printk("URGP=%u ", ntohs(th->urg_ptr));
-
-		if ((logflags & IP6T_LOG_TCPOPT)
-		    && th->doff * 4 > sizeof(struct tcphdr)) {
-			u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
-			unsigned int i;
-			unsigned int optsize = th->doff * 4
-					       - sizeof(struct tcphdr);
-
-			op = skb_header_pointer(skb,
-						ptr + sizeof(struct tcphdr),
-						optsize, _opt);
-			if (op == NULL) {
-				printk("OPT (TRUNCATED)");
-				return;
-			}
-
-			/* Max length: 127 "OPT (" 15*4*2chars ") " */
-			printk("OPT (");
-			for (i =0; i < optsize; i++)
-				printk("%02X", op[i]);
-			printk(") ");
-		}
-		break;
-	}
-	case IPPROTO_UDP: {
-		struct udphdr _udph, *uh;
-
-		/* Max length: 10 "PROTO=UDP " */
-		printk("PROTO=UDP ");
-
-		if (fragment)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
-		if (uh == NULL) {
-			printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
-			return;
-		}
-
-		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u LEN=%u ",
-		       ntohs(uh->source), ntohs(uh->dest),
-		       ntohs(uh->len));
-		break;
-	}
-	case IPPROTO_ICMPV6: {
-		struct icmp6hdr _icmp6h, *ic;
-
-		/* Max length: 13 "PROTO=ICMPv6 " */
-		printk("PROTO=ICMPv6 ");
-
-		if (fragment)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
-		if (ic == NULL) {
-			printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
-			return;
-		}
-
-		/* Max length: 18 "TYPE=255 CODE=255 " */
-		printk("TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
-
-		switch (ic->icmp6_type) {
-		case ICMPV6_ECHO_REQUEST:
-		case ICMPV6_ECHO_REPLY:
-			/* Max length: 19 "ID=65535 SEQ=65535 " */
-			printk("ID=%u SEQ=%u ",
-				ntohs(ic->icmp6_identifier),
-				ntohs(ic->icmp6_sequence));
-			break;
-		case ICMPV6_MGM_QUERY:
-		case ICMPV6_MGM_REPORT:
-		case ICMPV6_MGM_REDUCTION:
-			break;
-
-		case ICMPV6_PARAMPROB:
-			/* Max length: 17 "POINTER=ffffffff " */
-			printk("POINTER=%08x ", ntohl(ic->icmp6_pointer));
-			/* Fall through */
-		case ICMPV6_DEST_UNREACH:
-		case ICMPV6_PKT_TOOBIG:
-		case ICMPV6_TIME_EXCEED:
-			/* Max length: 3+maxlen */
-			if (recurse) {
-				printk("[");
-				dump_packet(info, skb, ptr + sizeof(_icmp6h),
-					    0);
-				printk("] ");
-			}
-
-			/* Max length: 10 "MTU=65535 " */
-			if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
-				printk("MTU=%u ", ntohl(ic->icmp6_mtu));
-		}
-		break;
-	}
-	/* Max length: 10 "PROTO=255 " */
-	default:
-		printk("PROTO=%u ", currenthdr);
-	}
-
-	/* Max length: 15 "UID=4294967295 " */
-	if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
-		read_lock_bh(&skb->sk->sk_callback_lock);
-		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
-			printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
-		read_unlock_bh(&skb->sk->sk_callback_lock);
-	}
-}
-
-static struct nf_loginfo default_loginfo = {
-	.type	= NF_LOG_TYPE_LOG,
-	.u = {
-		.log = {
-			.level	  = 0,
-			.logflags = NF_LOG_MASK,
-		},
-	},
-};
-
-static void
-ip6t_log_packet(unsigned int pf,
-		unsigned int hooknum,
-		const struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		const struct nf_loginfo *loginfo,
-		const char *prefix)
-{
-	if (!loginfo)
-		loginfo = &default_loginfo;
-
-	spin_lock_bh(&log_lock);
-	printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, 
-		prefix,
-		in ? in->name : "",
-		out ? out->name : "");
-	if (in && !out) {
-		unsigned int len;
-		/* MAC logging for input chain only. */
-		printk("MAC=");
-		if (skb->dev && (len = skb->dev->hard_header_len) &&
-		    skb->mac.raw != skb->nh.raw) {
-			unsigned char *p = skb->mac.raw;
-			int i;
-
-			if (skb->dev->type == ARPHRD_SIT &&
-			    (p -= ETH_HLEN) < skb->head)
-				p = NULL;
-
-			if (p != NULL) {
-				for (i = 0; i < len; i++)
-					printk("%02x%s", p[i],
-					       i == len - 1 ? "" : ":");
-			}
-			printk(" ");
-
-			if (skb->dev->type == ARPHRD_SIT) {
-				struct iphdr *iph = (struct iphdr *)skb->mac.raw;
-				printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
-				       NIPQUAD(iph->saddr),
-				       NIPQUAD(iph->daddr));
-			}
-		} else
-			printk(" ");
-	}
-
-	dump_packet(loginfo, skb, (u8*)skb->nh.ipv6h - skb->data, 1);
-	printk("\n");
-	spin_unlock_bh(&log_lock);
-}
-
-static unsigned int
-ip6t_log_target(struct sk_buff **pskb,
-		const struct net_device *in,
-		const struct net_device *out,
-		unsigned int hooknum,
-		const void *targinfo,
-		void *userinfo)
-{
-	const struct ip6t_log_info *loginfo = targinfo;
-	struct nf_loginfo li;
-
-	li.type = NF_LOG_TYPE_LOG;
-	li.u.log.level = loginfo->level;
-	li.u.log.logflags = loginfo->logflags;
-
-	nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix);
-
-	return IP6T_CONTINUE;
-}
-
-
-static int ip6t_log_checkentry(const char *tablename,
-			       const struct ip6t_entry *e,
-			       void *targinfo,
-			       unsigned int targinfosize,
-			       unsigned int hook_mask)
-{
-	const struct ip6t_log_info *loginfo = targinfo;
-
-	if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_log_info))) {
-		DEBUGP("LOG: targinfosize %u != %u\n",
-		       targinfosize, IP6T_ALIGN(sizeof(struct ip6t_log_info)));
-		return 0;
-	}
-
-	if (loginfo->level >= 8) {
-		DEBUGP("LOG: level %u >= 8\n", loginfo->level);
-		return 0;
-	}
-
-	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
-		DEBUGP("LOG: prefix term %i\n",
-		       loginfo->prefix[sizeof(loginfo->prefix)-1]);
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ip6t_target ip6t_log_reg = {
-	.name 		= "LOG",
-	.target 	= ip6t_log_target, 
-	.checkentry	= ip6t_log_checkentry, 
-	.me 		= THIS_MODULE,
-};
-
-static struct nf_logger ip6t_logger = {
-	.name		= "ip6t_LOG",
-	.logfn		= &ip6t_log_packet,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	if (ip6t_register_target(&ip6t_log_reg))
-		return -EINVAL;
-	if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
-		printk(KERN_WARNING "ip6t_LOG: not logging via system console "
-		       "since somebody else already registered for PF_INET6\n");
-		/* we cannot make module load fail here, since otherwise
-		 * ip6tables userspace would abort */
-	}
-
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	nf_log_unregister_logger(&ip6t_logger);
-	ip6t_unregister_target(&ip6t_log_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
deleted file mode 100644
index 0c7584f9217..00000000000
--- a/net/ipv6/netfilter/ip6t_MARK.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* This is a module which is used for setting the NFMARK field of an skb. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_MARK.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ip6t_mark_target_info *markinfo = targinfo;
-
-	if((*pskb)->nfmark != markinfo->mark)
-		(*pskb)->nfmark = markinfo->mark;
-
-	return IP6T_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ip6t_entry *e,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_mark_target_info))) {
-		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
-		       targinfosize,
-		       IP6T_ALIGN(sizeof(struct ip6t_mark_target_info)));
-		return 0;
-	}
-
-	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ip6t_target ip6t_mark_reg = {
-	.name 		= "MARK",
-	.target 	= target,
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE
-};
-
-static int __init init(void)
-{
-	printk(KERN_DEBUG "registering ipv6 mark target\n");
-	if (ip6t_register_target(&ip6t_mark_reg))
-		return -EINVAL;
-
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_target(&ip6t_mark_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
new file mode 100644
index 00000000000..3e4e92d5e15
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+
+static unsigned int
+masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range *range = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct in6_addr src;
+	struct nf_conn *ct;
+	struct nf_nat_range newrange;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			    ctinfo == IP_CT_RELATED_REPLY));
+
+	if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
+			       &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+		return NF_DROP;
+
+	nfct_nat(ct)->masq_index = par->out->ifindex;
+
+	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.in6	= src;
+	newrange.max_addr.in6	= src;
+	newrange.min_proto	= range->min_proto;
+	newrange.max_proto	= range->max_proto;
+
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+
+static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS)
+		return -EINVAL;
+	return 0;
+}
+
+static int device_cmp(struct nf_conn *ct, void *ifindex)
+{
+	const struct nf_conn_nat *nat = nfct_nat(ct);
+
+	if (!nat)
+		return 0;
+	if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+		return 0;
+	return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+			     unsigned long event, void *ptr)
+{
+	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+
+	if (event == NETDEV_DOWN)
+		nf_ct_iterate_cleanup(net, device_cmp,
+				      (void *)(long)dev->ifindex, 0, 0);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+	.notifier_call	= masq_device_event,
+};
+
+static int masq_inet_event(struct notifier_block *this,
+			   unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *ifa = ptr;
+	struct netdev_notifier_info info;
+
+	netdev_notifier_info_init(&info, ifa->idev->dev);
+	return masq_device_event(this, event, &info);
+}
+
+static struct notifier_block masq_inet_notifier = {
+	.notifier_call	= masq_inet_event,
+};
+
+static struct xt_target masquerade_tg6_reg __read_mostly = {
+	.name		= "MASQUERADE",
+	.family		= NFPROTO_IPV6,
+	.checkentry	= masquerade_tg6_checkentry,
+	.target		= masquerade_tg6,
+	.targetsize	= sizeof(struct nf_nat_range),
+	.table		= "nat",
+	.hooks		= 1 << NF_INET_POST_ROUTING,
+	.me		= THIS_MODULE,
+};
+
+static int __init masquerade_tg6_init(void)
+{
+	int err;
+
+	err = xt_register_target(&masquerade_tg6_reg);
+	if (err == 0) {
+		register_netdevice_notifier(&masq_dev_notifier);
+		register_inet6addr_notifier(&masq_inet_notifier);
+	}
+
+	return err;
+}
+static void __exit masquerade_tg6_exit(void)
+{
+	unregister_inet6addr_notifier(&masq_inet_notifier);
+	unregister_netdevice_notifier(&masq_dev_notifier);
+	xt_unregister_target(&masquerade_tg6_reg);
+}
+
+module_init(masquerade_tg6_init);
+module_exit(masquerade_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: automatic address SNAT");
diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c
deleted file mode 100644
index c6e3730e740..00000000000
--- a/net/ipv6/netfilter/ip6t_NFQUEUE.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* ip6tables module for using new netfilter netlink queue
- *
- * (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as 
- * published by the Free Software Foundation.
- * 
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("ip6tables NFQUEUE target");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ipt_NFQ_info *tinfo = targinfo;
-
-	return NF_QUEUE_NR(tinfo->queuenum);
-}
-
-static int
-checkentry(const char *tablename,
-	   const struct ip6t_entry *e,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) {
-		printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
-		       targinfosize,
-		       IP6T_ALIGN(sizeof(struct ipt_NFQ_info)));
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ip6t_target ipt_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_target(&ipt_NFQ_reg);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_target(&ipt_NFQ_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
new file mode 100644
index 00000000000..590f767db5d
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6t_NPT.h>
+#include <linux/netfilter/x_tables.h>
+
+static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
+{
+	struct ip6t_npt_tginfo *npt = par->targinfo;
+	struct in6_addr pfx;
+	__wsum src_sum, dst_sum;
+
+	if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
+		return -EINVAL;
+
+	/* Ensure that LSB of prefix is zero */
+	ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len);
+	if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6))
+		return -EINVAL;
+	ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len);
+	if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
+		return -EINVAL;
+
+	src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0);
+	dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);
+
+	npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
+	return 0;
+}
+
+static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
+			     struct in6_addr *addr)
+{
+	unsigned int pfx_len;
+	unsigned int i, idx;
+	__be32 mask;
+	__sum16 sum;
+
+	pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len);
+	for (i = 0; i < pfx_len; i += 32) {
+		if (pfx_len - i >= 32)
+			mask = 0;
+		else
+			mask = htonl((1 << (i - pfx_len + 32)) - 1);
+
+		idx = i / 32;
+		addr->s6_addr32[idx] &= mask;
+		addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx];
+	}
+
+	if (pfx_len <= 48)
+		idx = 3;
+	else {
+		for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) {
+			if ((__force __sum16)addr->s6_addr16[idx] !=
+			    CSUM_MANGLED_0)
+				break;
+		}
+		if (idx == ARRAY_SIZE(addr->s6_addr16))
+			return false;
+	}
+
+	sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]),
+				  csum_unfold(npt->adjustment)));
+	if (sum == CSUM_MANGLED_0)
+		sum = 0;
+	*(__force __sum16 *)&addr->s6_addr16[idx] = sum;
+
+	return true;
+}
+
+static unsigned int
+ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+	if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) {
+		icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+			    offsetof(struct ipv6hdr, saddr));
+		return NF_DROP;
+	}
+	return XT_CONTINUE;
+}
+
+static unsigned int
+ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+	if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) {
+		icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+			    offsetof(struct ipv6hdr, daddr));
+		return NF_DROP;
+	}
+	return XT_CONTINUE;
+}
+
+static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
+	{
+		.name		= "SNPT",
+		.table		= "mangle",
+		.target		= ip6t_snpt_tg,
+		.targetsize	= sizeof(struct ip6t_npt_tginfo),
+		.checkentry	= ip6t_npt_checkentry,
+		.family		= NFPROTO_IPV6,
+		.hooks		= (1 << NF_INET_LOCAL_IN) |
+				  (1 << NF_INET_POST_ROUTING),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DNPT",
+		.table		= "mangle",
+		.target		= ip6t_dnpt_tg,
+		.targetsize	= sizeof(struct ip6t_npt_tginfo),
+		.checkentry	= ip6t_npt_checkentry,
+		.family		= NFPROTO_IPV6,
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_OUT),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init ip6t_npt_init(void)
+{
+	return xt_register_targets(ip6t_npt_target_reg,
+				   ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+static void __exit ip6t_npt_exit(void)
+{
+	xt_unregister_targets(ip6t_npt_target_reg,
+			      ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+module_init(ip6t_npt_init);
+module_exit(ip6t_npt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS("ip6t_SNPT");
+MODULE_ALIAS("ip6t_DNPT");
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index b03e87adca9..544b0a9da1b 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -7,6 +7,8 @@
  * Authors:
  *	Yasuyuki Kozakai	<yasuyuki.kozakai@toshiba.co.jp>
  *
+ * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net>
+ *
  * Based on net/ipv4/netfilter/ipt_REJECT.c
  *
  * This program is free software; you can redistribute it and/or
@@ -14,268 +16,103 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/config.h>
+#include <linux/gfp.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/icmpv6.h>
 #include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include <net/tcp.h>
 #include <net/icmp.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
 #include <net/flow.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_REJECT.h>
 
+#include <net/netfilter/ipv6/nf_reject.h>
+
 MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
-MODULE_DESCRIPTION("IP6 tables REJECT target module");
+MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
 MODULE_LICENSE("GPL");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Send RST reply */
-static void send_reset(struct sk_buff *oldskb)
-{
-	struct sk_buff *nskb;
-	struct tcphdr otcph, *tcph;
-	unsigned int otcplen, hh_len;
-	int tcphoff, needs_ack;
-	struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h;
-	struct dst_entry *dst = NULL;
-	u8 proto;
-	struct flowi fl;
-
-	if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
-	    (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
-		DEBUGP("ip6t_REJECT: addr is not unicast.\n");
-		return;
-	}
-
-	proto = oip6h->nexthdr;
-	tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
-
-	if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
-		DEBUGP("ip6t_REJECT: Can't get TCP header.\n");
-		return;
-	}
-
-	otcplen = oldskb->len - tcphoff;
-
-	/* IP header checks: fragment, too short. */
-	if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) {
-		DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n",
-			proto, otcplen);
-		return;
-	}
-
-	if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
-		BUG();
-
-	/* No RST for RST. */
-	if (otcph.rst) {
-		DEBUGP("ip6t_REJECT: RST is set\n");
-		return;
-	}
-
-	/* Check checksum. */
-	if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
-			    skb_checksum(oldskb, tcphoff, otcplen, 0))) {
-		DEBUGP("ip6t_REJECT: TCP checksum is invalid\n");
-		return;
-	}
-
-	memset(&fl, 0, sizeof(fl));
-	fl.proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr);
-	ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
-	fl.fl_ip_sport = otcph.dest;
-	fl.fl_ip_dport = otcph.source;
-	dst = ip6_route_output(NULL, &fl);
-	if (dst == NULL)
-		return;
-	if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0))
-		return;
-
-	hh_len = (dst->dev->hard_header_len + 15)&~15;
-	nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
-			 + sizeof(struct tcphdr) + dst->trailer_len,
-			 GFP_ATOMIC);
-
-	if (!nskb) {
-		if (net_ratelimit())
-			printk("ip6t_REJECT: Can't alloc skb\n");
-		dst_release(dst);
-		return;
-	}
-
-	nskb->dst = dst;
-
-	skb_reserve(nskb, hh_len + dst->header_len);
-
-	ip6h = nskb->nh.ipv6h = (struct ipv6hdr *)
-					skb_put(nskb, sizeof(struct ipv6hdr));
-	ip6h->version = 6;
-	ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
-	ip6h->nexthdr = IPPROTO_TCP;
-	ip6h->payload_len = htons(sizeof(struct tcphdr));
-	ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
-	ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
-
-	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
-	/* Truncate to length (no data) */
-	tcph->doff = sizeof(struct tcphdr)/4;
-	tcph->source = otcph.dest;
-	tcph->dest = otcph.source;
 
-	if (otcph.ack) {
-		needs_ack = 0;
-		tcph->seq = otcph.ack_seq;
-		tcph->ack_seq = 0;
-	} else {
-		needs_ack = 1;
-		tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
-				      + otcplen - (otcph.doff<<2));
-		tcph->seq = 0;
-	}
-
-	/* Reset flags */
-	((u_int8_t *)tcph)[13] = 0;
-	tcph->rst = 1;
-	tcph->ack = needs_ack;
-	tcph->window = 0;
-	tcph->urg_ptr = 0;
-	tcph->check = 0;
-
-	/* Adjust TCP checksum */
-	tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr,
-				      &nskb->nh.ipv6h->daddr,
-				      sizeof(struct tcphdr), IPPROTO_TCP,
-				      csum_partial((char *)tcph,
-						   sizeof(struct tcphdr), 0));
-
-	NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
-		dst_output);
-}
-
-static inline void
-send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
+static unsigned int
+reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL)
-		skb_in->dev = &loopback_dev;
+	const struct ip6t_reject_info *reject = par->targinfo;
+	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
 
-	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
-}
-
-static unsigned int reject6_target(struct sk_buff **pskb,
-			   const struct net_device *in,
-			   const struct net_device *out,
-			   unsigned int hooknum,
-			   const void *targinfo,
-			   void *userinfo)
-{
-	const struct ip6t_reject_info *reject = targinfo;
-
-	DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__);
-	/* WARNING: This code causes reentry within ip6tables.
-	   This means that the ip6tables jump stack is now crap.  We
-	   must return an absolute verdict. --RR */
-    	switch (reject->with) {
-    	case IP6T_ICMP6_NO_ROUTE:
-    		send_unreach(*pskb, ICMPV6_NOROUTE, hooknum);
-    		break;
-    	case IP6T_ICMP6_ADM_PROHIBITED:
-    		send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum);
-    		break;
-    	case IP6T_ICMP6_NOT_NEIGHBOUR:
-    		send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum);
-    		break;
-    	case IP6T_ICMP6_ADDR_UNREACH:
-    		send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum);
-    		break;
-    	case IP6T_ICMP6_PORT_UNREACH:
-    		send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum);
-    		break;
-    	case IP6T_ICMP6_ECHOREPLY:
+	pr_debug("%s: medium point\n", __func__);
+	switch (reject->with) {
+	case IP6T_ICMP6_NO_ROUTE:
+		nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
+		break;
+	case IP6T_ICMP6_ADM_PROHIBITED:
+		nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
+		break;
+	case IP6T_ICMP6_NOT_NEIGHBOUR:
+		nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
+		break;
+	case IP6T_ICMP6_ADDR_UNREACH:
+		nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
+		break;
+	case IP6T_ICMP6_PORT_UNREACH:
+		nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
+		break;
+	case IP6T_ICMP6_ECHOREPLY:
 		/* Do nothing */
 		break;
 	case IP6T_TCP_RESET:
-		send_reset(*pskb);
+		nf_send_reset6(net, skb, par->hooknum);
 		break;
 	default:
-		if (net_ratelimit())
-			printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with);
+		net_info_ratelimited("case %u not handled yet\n", reject->with);
 		break;
 	}
 
 	return NF_DROP;
 }
 
-static int check(const char *tablename,
-		 const struct ip6t_entry *e,
-		 void *targinfo,
-		 unsigned int targinfosize,
-		 unsigned int hook_mask)
+static int reject_tg6_check(const struct xt_tgchk_param *par)
 {
- 	const struct ip6t_reject_info *rejinfo = targinfo;
-
- 	if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) {
-  		DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize);
-  		return 0;
-  	}
-
-	/* Only allow these for packet filtering. */
-	if (strcmp(tablename, "filter") != 0) {
-		DEBUGP("ip6t_REJECT: bad table `%s'.\n", tablename);
-		return 0;
-	}
-
-	if ((hook_mask & ~((1 << NF_IP6_LOCAL_IN)
-			   | (1 << NF_IP6_FORWARD)
-			   | (1 << NF_IP6_LOCAL_OUT))) != 0) {
-		DEBUGP("ip6t_REJECT: bad hook mask %X\n", hook_mask);
-		return 0;
-	}
+	const struct ip6t_reject_info *rejinfo = par->targinfo;
+	const struct ip6t_entry *e = par->entryinfo;
 
 	if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
-		printk("ip6t_REJECT: ECHOREPLY is not supported.\n");
-		return 0;
+		pr_info("ECHOREPLY is not supported.\n");
+		return -EINVAL;
 	} else if (rejinfo->with == IP6T_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
-		if (e->ipv6.proto != IPPROTO_TCP
-		    || (e->ipv6.invflags & IP6T_INV_PROTO)) {
-			DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
-			return 0;
+		if (e->ipv6.proto != IPPROTO_TCP ||
+		    (e->ipv6.invflags & XT_INV_PROTO)) {
+			pr_info("TCP_RESET illegal for non-tcp\n");
+			return -EINVAL;
 		}
 	}
-
-	return 1;
+	return 0;
 }
 
-static struct ip6t_target ip6t_reject_reg = {
+static struct xt_target reject_tg6_reg __read_mostly = {
 	.name		= "REJECT",
-	.target		= reject6_target,
-	.checkentry	= check,
+	.family		= NFPROTO_IPV6,
+	.target		= reject_tg6,
+	.targetsize	= sizeof(struct ip6t_reject_info),
+	.table		= "filter",
+	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_LOCAL_OUT),
+	.checkentry	= reject_tg6_check,
 	.me		= THIS_MODULE
 };
 
-static int __init init(void)
+static int __init reject_tg6_init(void)
 {
-	if (ip6t_register_target(&ip6t_reject_reg))
-		return -EINVAL;
-	return 0;
+	return xt_register_target(&reject_tg6_reg);
 }
 
-static void __exit fini(void)
+static void __exit reject_tg6_exit(void)
 {
-	ip6t_unregister_target(&ip6t_reject_reg);
+	xt_unregister_target(&reject_tg6_reg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(reject_tg6_init);
+module_exit(reject_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
new file mode 100644
index 00000000000..a0d17270117
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct ipv6hdr *
+synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
+				       const struct in6_addr *daddr)
+{
+	struct ipv6hdr *iph;
+
+	skb_reset_network_header(skb);
+	iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
+	ip6_flow_hdr(iph, 0, 0);
+	iph->hop_limit	= 64;	//XXX
+	iph->nexthdr	= IPPROTO_TCP;
+	iph->saddr	= *saddr;
+	iph->daddr	= *daddr;
+
+	return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+		  struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+		  struct ipv6hdr *niph, struct tcphdr *nth,
+		  unsigned int tcp_hdr_size)
+{
+	struct net *net = nf_ct_net((struct nf_conn *)nfct);
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
+	nskb->ip_summed   = CHECKSUM_PARTIAL;
+	nskb->csum_start  = (unsigned char *)nth - nskb->head;
+	nskb->csum_offset = offsetof(struct tcphdr, check);
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_proto = IPPROTO_TCP;
+	fl6.saddr = niph->saddr;
+	fl6.daddr = niph->daddr;
+	fl6.fl6_sport = nth->source;
+	fl6.fl6_dport = nth->dest;
+	security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (dst == NULL || dst->error) {
+		dst_release(dst);
+		goto free_nskb;
+	}
+	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+	if (IS_ERR(dst))
+		goto free_nskb;
+
+	skb_dst_set(nskb, dst);
+
+	if (nfct) {
+		nskb->nfct = nfct;
+		nskb->nfctinfo = ctinfo;
+		nf_conntrack_get(nfct);
+	}
+
+	ip6_local_out(nskb);
+	return;
+
+free_nskb:
+	kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+			    const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+	u16 mss = opts->mss;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (nskb == NULL)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(__cookie_v6_init_sequence(iph, th, &mss));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+	if (opts->options & XT_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= 0;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+			  niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 const struct synproxy_options *opts, u32 recv_seq)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (nskb == NULL)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(recv_seq - 1);
+	/* ack_seq is used to relay our ISN to the synproxy hook to initialize
+	 * sequence number translation once a connection tracking entry exists.
+	 */
+	nth->ack_seq	= htonl(ntohl(th->ack_seq) - 1);
+	tcp_flag_word(nth) = TCP_FLAG_SYN;
+	if (opts->options & XT_SYNPROXY_OPT_ECN)
+		tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= th->window;
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+			  niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+			 const struct ip_ct_tcp *state,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (nskb == NULL)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+	skb_reset_transport_header(nskb);
+	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->dest;
+	nth->dest	= th->source;
+	nth->seq	= htonl(ntohl(th->ack_seq));
+	nth->ack_seq	= htonl(ntohl(th->seq) + 1);
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 const struct synproxy_options *opts)
+{
+	struct sk_buff *nskb;
+	struct ipv6hdr *iph, *niph;
+	struct tcphdr *nth;
+	unsigned int tcp_hdr_size;
+
+	iph = ipv6_hdr(skb);
+
+	tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+	nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+			 GFP_ATOMIC);
+	if (nskb == NULL)
+		return;
+	skb_reserve(nskb, MAX_TCP_HEADER);
+
+	niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+	skb_reset_transport_header(nskb);
+	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+	nth->source	= th->source;
+	nth->dest	= th->dest;
+	nth->seq	= htonl(ntohl(th->seq) + 1);
+	nth->ack_seq	= th->ack_seq;
+	tcp_flag_word(nth) = TCP_FLAG_ACK;
+	nth->doff	= tcp_hdr_size / 4;
+	nth->window	= ntohs(htons(th->window) >> opts->wscale);
+	nth->check	= 0;
+	nth->urg_ptr	= 0;
+
+	synproxy_build_options(nth, opts);
+
+	synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+			 const struct sk_buff *skb, const struct tcphdr *th,
+			 struct synproxy_options *opts, u32 recv_seq)
+{
+	int mss;
+
+	mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+	if (mss == 0) {
+		this_cpu_inc(snet->stats->cookie_invalid);
+		return false;
+	}
+
+	this_cpu_inc(snet->stats->cookie_valid);
+	opts->mss = mss;
+	opts->options |= XT_SYNPROXY_OPT_MSS;
+
+	if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+		synproxy_check_timestamp_cookie(opts);
+
+	synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+	return true;
+}
+
+static unsigned int
+synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_synproxy_info *info = par->targinfo;
+	struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+	struct synproxy_options opts = {};
+	struct tcphdr *th, _th;
+
+	if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+		return NF_DROP;
+
+	th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+	if (th == NULL)
+		return NF_DROP;
+
+	if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+		return NF_DROP;
+
+	if (th->syn && !(th->ack || th->fin || th->rst)) {
+		/* Initial SYN from client */
+		this_cpu_inc(snet->stats->syn_received);
+
+		if (th->ece && th->cwr)
+			opts.options |= XT_SYNPROXY_OPT_ECN;
+
+		opts.options &= info->options;
+		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+			synproxy_init_timestamp_cookie(info, &opts);
+		else
+			opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+					  XT_SYNPROXY_OPT_SACK_PERM |
+					  XT_SYNPROXY_OPT_ECN);
+
+		synproxy_send_client_synack(skb, th, &opts);
+		return NF_DROP;
+
+	} else if (th->ack && !(th->fin || th->rst || th->syn)) {
+		/* ACK from client */
+		synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+		return NF_DROP;
+	}
+
+	return XT_CONTINUE;
+}
+
+static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
+				       struct sk_buff *skb,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	struct nf_conn_synproxy *synproxy;
+	struct synproxy_options opts = {};
+	const struct ip_ct_tcp *state;
+	struct tcphdr *th, _th;
+	__be16 frag_off;
+	u8 nexthdr;
+	int thoff;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return NF_ACCEPT;
+
+	synproxy = nfct_synproxy(ct);
+	if (synproxy == NULL)
+		return NF_ACCEPT;
+
+	if (nf_is_loopback_packet(skb))
+		return NF_ACCEPT;
+
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+				 &frag_off);
+	if (thoff < 0)
+		return NF_ACCEPT;
+
+	th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+	if (th == NULL)
+		return NF_DROP;
+
+	state = &ct->proto.tcp;
+	switch (state->state) {
+	case TCP_CONNTRACK_CLOSE:
+		if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+			nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+						      ntohl(th->seq) + 1);
+			break;
+		}
+
+		if (!th->syn || th->ack ||
+		    CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+			break;
+
+		/* Reopened connection - reset the sequence number and timestamp
+		 * adjustments, they will get initialized once the connection is
+		 * reestablished.
+		 */
+		nf_ct_seqadj_init(ct, ctinfo, 0);
+		synproxy->tsoff = 0;
+		this_cpu_inc(snet->stats->conn_reopened);
+
+		/* fall through */
+	case TCP_CONNTRACK_SYN_SENT:
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (!th->syn && th->ack &&
+		    CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+			/* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+			 * therefore we need to add 1 to make the SYN sequence
+			 * number match the one of first SYN.
+			 */
+			if (synproxy_recv_client_ack(snet, skb, th, &opts,
+						     ntohl(th->seq) + 1))
+				this_cpu_inc(snet->stats->cookie_retrans);
+
+			return NF_DROP;
+		}
+
+		synproxy->isn = ntohl(th->ack_seq);
+		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+			synproxy->its = opts.tsecr;
+		break;
+	case TCP_CONNTRACK_SYN_RECV:
+		if (!th->syn || !th->ack)
+			break;
+
+		if (!synproxy_parse_options(skb, thoff, th, &opts))
+			return NF_DROP;
+
+		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+			synproxy->tsoff = opts.tsval - synproxy->its;
+
+		opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+				  XT_SYNPROXY_OPT_WSCALE |
+				  XT_SYNPROXY_OPT_SACK_PERM);
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+		swap(opts.tsval, opts.tsecr);
+		synproxy_send_client_ack(snet, skb, th, &opts);
+
+		consume_skb(skb);
+		return NF_STOLEN;
+	default:
+		break;
+	}
+
+	synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+	return NF_ACCEPT;
+}
+
+static int synproxy_tg6_check(const struct xt_tgchk_param *par)
+{
+	const struct ip6t_entry *e = par->entryinfo;
+
+	if (!(e->ipv6.flags & IP6T_F_PROTO) ||
+	    e->ipv6.proto != IPPROTO_TCP ||
+	    e->ipv6.invflags & XT_INV_PROTO)
+		return -EINVAL;
+
+	return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg6_reg __read_mostly = {
+	.name		= "SYNPROXY",
+	.family		= NFPROTO_IPV6,
+	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
+	.target		= synproxy_tg6,
+	.targetsize	= sizeof(struct xt_synproxy_info),
+	.checkentry	= synproxy_tg6_check,
+	.destroy	= synproxy_tg6_destroy,
+	.me		= THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+	{
+		.hook		= ipv6_synproxy_hook,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+	{
+		.hook		= ipv6_synproxy_hook,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+	},
+};
+
+static int __init synproxy_tg6_init(void)
+{
+	int err;
+
+	err = nf_register_hooks(ipv6_synproxy_ops,
+				ARRAY_SIZE(ipv6_synproxy_ops));
+	if (err < 0)
+		goto err1;
+
+	err = xt_register_target(&synproxy_tg6_reg);
+	if (err < 0)
+		goto err2;
+
+	return 0;
+
+err2:
+	nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+err1:
+	return err;
+}
+
+static void __exit synproxy_tg6_exit(void)
+{
+	xt_unregister_target(&synproxy_tg6_reg);
+	nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+}
+
+module_init(synproxy_tg6_init);
+module_exit(synproxy_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index dde37793d20..04099ab7d2e 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -6,132 +6,116 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
 #include <net/checksum.h>
 #include <net/ipv6.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_ah.h>
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 AH match");
+MODULE_DESCRIPTION("Xtables: IPv6 IPsec-AH match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
-static inline int
-spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
-	int r=0;
-	DEBUGP("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
-	       min,spi,max);
+	bool r;
+
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+		 invert ? '!' : ' ', min, spi, max);
 	r = (spi >= min && spi <= max) ^ invert;
-	DEBUGP(" result %s\n",r? "PASS\n" : "FAILED\n");
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
 	return r;
 }
 
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
+static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct ip_auth_hdr *ah, _ah;
-	const struct ip6t_ah *ahinfo = matchinfo;
-	unsigned int ptr;
+	struct ip_auth_hdr _ah;
+	const struct ip_auth_hdr *ah;
+	const struct ip6t_ah *ahinfo = par->matchinfo;
+	unsigned int ptr = 0;
 	unsigned int hdrlen = 0;
+	int err;
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0)
-		return 0;
+	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL, NULL);
+	if (err < 0) {
+		if (err != -ENOENT)
+			par->hotdrop = true;
+		return false;
+	}
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
 	if (ah == NULL) {
-		*hotdrop = 1;
-		return 0;
+		par->hotdrop = true;
+		return false;
 	}
 
 	hdrlen = (ah->hdrlen + 2) << 2;
 
-	DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
-	DEBUGP("RES %04X ", ah->reserved);
-	DEBUGP("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi));
-
-	DEBUGP("IPv6 AH spi %02X ",
-	       (spi_match(ahinfo->spis[0], ahinfo->spis[1],
-	                  ntohl(ah->spi),
-	                  !!(ahinfo->invflags & IP6T_AH_INV_SPI))));
-	DEBUGP("len %02X %04X %02X ",
-	       ahinfo->hdrlen, hdrlen,
-	       (!ahinfo->hdrlen ||
-	        (ahinfo->hdrlen == hdrlen) ^
-	        !!(ahinfo->invflags & IP6T_AH_INV_LEN)));
-	DEBUGP("res %02X %04X %02X\n",
-	       ahinfo->hdrres, ah->reserved,
-	       !(ahinfo->hdrres && ah->reserved));
-
-	return (ah != NULL)
-	       &&
-	       (spi_match(ahinfo->spis[0], ahinfo->spis[1],
-	                  ntohl(ah->spi),
-	                  !!(ahinfo->invflags & IP6T_AH_INV_SPI)))
-	       &&
-	       (!ahinfo->hdrlen ||
-	        (ahinfo->hdrlen == hdrlen) ^
-	        !!(ahinfo->invflags & IP6T_AH_INV_LEN))
-	       &&
-	       !(ahinfo->hdrres && ah->reserved);
+	pr_debug("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
+	pr_debug("RES %04X ", ah->reserved);
+	pr_debug("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi));
+
+	pr_debug("IPv6 AH spi %02X ",
+		 spi_match(ahinfo->spis[0], ahinfo->spis[1],
+			   ntohl(ah->spi),
+			   !!(ahinfo->invflags & IP6T_AH_INV_SPI)));
+	pr_debug("len %02X %04X %02X ",
+		 ahinfo->hdrlen, hdrlen,
+		 (!ahinfo->hdrlen ||
+		  (ahinfo->hdrlen == hdrlen) ^
+		  !!(ahinfo->invflags & IP6T_AH_INV_LEN)));
+	pr_debug("res %02X %04X %02X\n",
+		 ahinfo->hdrres, ah->reserved,
+		 !(ahinfo->hdrres && ah->reserved));
+
+	return (ah != NULL) &&
+		spi_match(ahinfo->spis[0], ahinfo->spis[1],
+			  ntohl(ah->spi),
+			  !!(ahinfo->invflags & IP6T_AH_INV_SPI)) &&
+		(!ahinfo->hdrlen ||
+		 (ahinfo->hdrlen == hdrlen) ^
+		 !!(ahinfo->invflags & IP6T_AH_INV_LEN)) &&
+		!(ahinfo->hdrres && ah->reserved);
 }
 
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
-          void *matchinfo,
-          unsigned int matchinfosize,
-          unsigned int hook_mask)
+static int ah_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_ah *ahinfo = matchinfo;
+	const struct ip6t_ah *ahinfo = par->matchinfo;
 
-	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_ah))) {
-		DEBUGP("ip6t_ah: matchsize %u != %u\n",
-		       matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_ah)));
-		return 0;
-	}
 	if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
-		DEBUGP("ip6t_ah: unknown flags %X\n", ahinfo->invflags);
-		return 0;
+		pr_debug("unknown flags %X\n", ahinfo->invflags);
+		return -EINVAL;
 	}
-	return 1;
+	return 0;
 }
 
-static struct ip6t_match ah_match = {
+static struct xt_match ah_mt6_reg __read_mostly = {
 	.name		= "ah",
-	.match		= &match,
-	.checkentry	= &checkentry,
+	.family		= NFPROTO_IPV6,
+	.match		= ah_mt6,
+	.matchsize	= sizeof(struct ip6t_ah),
+	.checkentry	= ah_mt6_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+static int __init ah_mt6_init(void)
 {
-	return ip6t_register_match(&ah_match);
+	return xt_register_match(&ah_mt6_reg);
 }
 
-static void __exit cleanup(void)
+static void __exit ah_mt6_exit(void)
 {
-	ip6t_unregister_match(&ah_match);
+	xt_unregister_match(&ah_mt6_reg);
 }
 
-module_init(init);
-module_exit(cleanup);
+module_init(ah_mt6_init);
+module_exit(ah_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
deleted file mode 100644
index c450a635e54..00000000000
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/* Kernel module to match Hop-by-Hop and Destination parameters. */
-
-/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <net/checksum.h>
-#include <net/ipv6.h>
-
-#include <asm/byteorder.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_opts.h>
-
-#define HOPBYHOP	0
-
-MODULE_LICENSE("GPL");
-#if HOPBYHOP
-MODULE_DESCRIPTION("IPv6 HbH match");
-#else
-MODULE_DESCRIPTION("IPv6 DST match");
-#endif
-MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/*
- * (Type & 0xC0) >> 6
- * 	0	-> ignorable
- * 	1	-> must drop the packet
- * 	2	-> send ICMP PARM PROB regardless and drop packet
- * 	3	-> Send ICMP if not a multicast address and drop packet
- *  (Type & 0x20) >> 5
- *  	0	-> invariant
- *  	1	-> can change the routing
- *  (Type & 0x1F) Type
- *      0	-> Pad1 (only 1 byte!)
- *      1	-> PadN LENGTH info (total length = length + 2)
- *      C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
- *      5	-> RTALERT 2 x x
- */
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-       struct ipv6_opt_hdr _optsh, *oh;
-       const struct ip6t_opts *optinfo = matchinfo;
-       unsigned int temp;
-       unsigned int ptr;
-       unsigned int hdrlen = 0;
-       unsigned int ret = 0;
-       u8 _opttype, *tp = NULL;
-       u8 _optlen, *lp = NULL;
-       unsigned int optlen;
-       
-#if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
-#else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
-#endif
-		return 0;
-
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       if (oh == NULL){
-	       *hotdrop = 1;
-       		return 0;
-       }
-
-       hdrlen = ipv6_optlen(oh);
-       if (skb->len - ptr < hdrlen){
-	       /* Packet smaller than it's length field */
-       		return 0;
-       }
-
-       DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
-
-       DEBUGP("len %02X %04X %02X ",
-       		optinfo->hdrlen, hdrlen,
-       		(!(optinfo->flags & IP6T_OPTS_LEN) ||
-                           ((optinfo->hdrlen == hdrlen) ^
-                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
-
-       ret = (oh != NULL)
-       		&&
-	      	(!(optinfo->flags & IP6T_OPTS_LEN) ||
-                           ((optinfo->hdrlen == hdrlen) ^
-                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
-
-       ptr += 2;
-       hdrlen -= 2;
-       if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){
-	       return ret;
-	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
-		DEBUGP("Not strict - not implemented");
-	} else {
-		DEBUGP("Strict ");
-		DEBUGP("#%d ",optinfo->optsnr);
-		for(temp=0; temp<optinfo->optsnr; temp++){
-			/* type field exists ? */
-			if (hdrlen < 1)
-				break;
-			tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
-						&_opttype);
-			if (tp == NULL)
-				break;
-
-			/* Type check */
-			if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){
-				DEBUGP("Tbad %02X %02X\n",
-				       *tp,
-				       (optinfo->opts[temp] & 0xFF00)>>8);
-				return 0;
-			} else {
-				DEBUGP("Tok ");
-			}
-			/* Length check */
-			if (*tp) {
-				u16 spec_len;
-
-				/* length field exists ? */
-				if (hdrlen < 2)
-					break;
-				lp = skb_header_pointer(skb, ptr + 1,
-							sizeof(_optlen),
-							&_optlen);
-				if (lp == NULL)
-					break;
-				spec_len = optinfo->opts[temp] & 0x00FF;
-
-				if (spec_len != 0x00FF && spec_len != *lp) {
-					DEBUGP("Lbad %02X %04X\n", *lp,
-					       spec_len);
-					return 0;
-				}
-				DEBUGP("Lok ");
-				optlen = *lp + 2;
-			} else {
-				DEBUGP("Pad1\n");
-				optlen = 1;
-			}
-
-			/* Step to the next */
-			DEBUGP("len%04X \n", optlen);
-
-			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
-			    (temp < optinfo->optsnr - 1)) {
-				DEBUGP("new pointer is too large! \n");
-				break;
-			}
-			ptr += optlen;
-			hdrlen -= optlen;
-		}
-		if (temp == optinfo->optsnr)
-			return ret;
-		else return 0;
-	}
-
-	return 0;
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
-          void *matchinfo,
-          unsigned int matchinfosize,
-          unsigned int hook_mask)
-{
-       const struct ip6t_opts *optsinfo = matchinfo;
-
-       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
-              DEBUGP("ip6t_opts: matchsize %u != %u\n",
-                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
-              return 0;
-       }
-       if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
-              DEBUGP("ip6t_opts: unknown flags %X\n",
-                      optsinfo->invflags);
-              return 0;
-       }
-
-       return 1;
-}
-
-static struct ip6t_match opts_match = {
-#if HOPBYHOP
-	.name		= "hbh",
-#else
-	.name		= "dst",
-#endif
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-       return ip6t_register_match(&opts_match);
-}
-
-static void __exit cleanup(void)
-{
-       ip6t_unregister_match(&opts_match);
-}
-
-module_init(init);
-module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
deleted file mode 100644
index 24bc0cde43a..00000000000
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/* Kernel module to match ESP parameters. */
-/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <net/checksum.h>
-#include <net/ipv6.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_esp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 ESP match");
-MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Returns 1 if the spi is matched by the range, 0 otherwise */
-static inline int
-spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
-{
-	int r=0;
-	DEBUGP("esp spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
-	       min,spi,max);
-	r=(spi >= min && spi <= max) ^ invert;
-	DEBUGP(" result %s\n",r? "PASS\n" : "FAILED\n");
-	return r;
-}
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	struct ip_esp_hdr _esp, *eh;
-	const struct ip6t_esp *espinfo = matchinfo;
-	unsigned int ptr;
-
-	/* Make sure this isn't an evil packet */
-	/*DEBUGP("ipv6_esp entered \n");*/
-
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0)
-		return 0;
-
-	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
-	if (eh == NULL) {
-		*hotdrop = 1;
-		return 0;
-	}
-
-	DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi));
-
-	return (eh != NULL)
-		&& spi_match(espinfo->spis[0], espinfo->spis[1],
-			      ntohl(eh->spi),
-			      !!(espinfo->invflags & IP6T_ESP_INV_SPI));
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
-	   const struct ip6t_ip6 *ip,
-	   void *matchinfo,
-	   unsigned int matchinfosize,
-	   unsigned int hook_mask)
-{
-	const struct ip6t_esp *espinfo = matchinfo;
-
-	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_esp))) {
-		DEBUGP("ip6t_esp: matchsize %u != %u\n",
-			 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_esp)));
-		return 0;
-	}
-	if (espinfo->invflags & ~IP6T_ESP_INV_MASK) {
-		DEBUGP("ip6t_esp: unknown flags %X\n",
-			 espinfo->invflags);
-		return 0;
-	}
-	return 1;
-}
-
-static struct ip6t_match esp_match = {
-	.name		= "esp",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&esp_match);
-}
-
-static void __exit cleanup(void)
-{
-	ip6t_unregister_match(&esp_match);
-}
-
-module_init(init);
-module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 616c2cbcd54..aab0706908c 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -12,90 +12,63 @@
 #include <linux/ipv6.h>
 #include <linux/if_ether.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
-MODULE_DESCRIPTION("IPv6 EUI64 address checking match");
+MODULE_DESCRIPTION("Xtables: IPv6 EUI64 address match");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
+static bool
+eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
+	unsigned char eui64[8];
 
-    unsigned char eui64[8];
-    int i=0;
-
-     if ( !(skb->mac.raw >= skb->head
-                && (skb->mac.raw + ETH_HLEN) <= skb->data)
-                && offset != 0) {
-                        *hotdrop = 1;
-                        return 0;
-                }
-    
-    memset(eui64, 0, sizeof(eui64));
-
-    if (eth_hdr(skb)->h_proto == ntohs(ETH_P_IPV6)) {
-      if (skb->nh.ipv6h->version == 0x6) { 
-         memcpy(eui64, eth_hdr(skb)->h_source, 3);
-         memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
-	 eui64[3]=0xff;
-	 eui64[4]=0xfe;
-	 eui64[0] |= 0x02;
+	if (!(skb_mac_header(skb) >= skb->head &&
+	      skb_mac_header(skb) + ETH_HLEN <= skb->data) &&
+	    par->fragoff != 0) {
+		par->hotdrop = true;
+		return false;
+	}
 
-	 i=0;
-	 while ((skb->nh.ipv6h->saddr.s6_addr[8+i] ==
-			 eui64[i]) && (i<8)) i++;
+	memset(eui64, 0, sizeof(eui64));
 
-	 if ( i == 8 )
-	 	return 1;
-      }
-    }
+	if (eth_hdr(skb)->h_proto == htons(ETH_P_IPV6)) {
+		if (ipv6_hdr(skb)->version == 0x6) {
+			memcpy(eui64, eth_hdr(skb)->h_source, 3);
+			memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
+			eui64[3] = 0xff;
+			eui64[4] = 0xfe;
+			eui64[0] ^= 0x02;
 
-    return 0;
-}
-
-static int
-ip6t_eui64_checkentry(const char *tablename,
-		   const struct ip6t_ip6 *ip,
-		   void *matchinfo,
-		   unsigned int matchsize,
-		   unsigned int hook_mask)
-{
-	if (hook_mask
-	    & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) |
-		(1 << NF_IP6_FORWARD))) {
-		printk("ip6t_eui64: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
-		return 0;
+			if (!memcmp(ipv6_hdr(skb)->saddr.s6_addr + 8, eui64,
+				    sizeof(eui64)))
+				return true;
+		}
 	}
 
-	if (matchsize != IP6T_ALIGN(sizeof(int)))
-		return 0;
-
-	return 1;
+	return false;
 }
 
-static struct ip6t_match eui64_match = {
+static struct xt_match eui64_mt6_reg __read_mostly = {
 	.name		= "eui64",
-	.match		= &match,
-	.checkentry	= &ip6t_eui64_checkentry,
+	.family		= NFPROTO_IPV6,
+	.match		= eui64_mt6,
+	.matchsize	= sizeof(int),
+	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_FORWARD),
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+static int __init eui64_mt6_init(void)
 {
-	return ip6t_register_match(&eui64_match);
+	return xt_register_match(&eui64_mt6_reg);
 }
 
-static void __exit fini(void)
+static void __exit eui64_mt6_exit(void)
 {
-	ip6t_unregister_match(&eui64_match);
+	xt_unregister_match(&eui64_mt6_reg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(eui64_mt6_init);
+module_exit(eui64_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 085d5f8eea2..3b5735e56bf 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
@@ -14,144 +14,123 @@
 #include <net/checksum.h>
 #include <net/ipv6.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_frag.h>
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 FRAG match");
+MODULE_DESCRIPTION("Xtables: IPv6 fragment match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Returns 1 if the id is matched by the range, 0 otherwise */
-static inline int
-id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
+static inline bool
+id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
-       int r=0;
-       DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
-              min,id,max);
-       r=(id >= min && id <= max) ^ invert;
-       DEBUGP(" result %s\n",r? "PASS" : "FAILED");
-       return r;
+	bool r;
+	pr_debug("id_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ',
+		 min, id, max);
+	r = (id >= min && id <= max) ^ invert;
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+	return r;
 }
 
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
+static bool
+frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
-       struct frag_hdr _frag, *fh;
-       const struct ip6t_frag *fraginfo = matchinfo;
-       unsigned int ptr;
-
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0)
-		return 0;
+	struct frag_hdr _frag;
+	const struct frag_hdr *fh;
+	const struct ip6t_frag *fraginfo = par->matchinfo;
+	unsigned int ptr = 0;
+	int err;
+
+	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL);
+	if (err < 0) {
+		if (err != -ENOENT)
+			par->hotdrop = true;
+		return false;
+	}
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
-	if (fh == NULL){
-		*hotdrop = 1;
-		return 0;
+	if (fh == NULL) {
+		par->hotdrop = true;
+		return false;
 	}
 
-       DEBUGP("INFO %04X ", fh->frag_off);
-       DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
-       DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6);
-       DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF));
-       DEBUGP("ID %u %08X\n", ntohl(fh->identification),
-	      ntohl(fh->identification));
-
-       DEBUGP("IPv6 FRAG id %02X ",
-       		(id_match(fraginfo->ids[0], fraginfo->ids[1],
-                           ntohl(fh->identification),
-                           !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))));
-       DEBUGP("res %02X %02X%04X %02X ", 
-       		(fraginfo->flags & IP6T_FRAG_RES), fh->reserved,
-		ntohs(fh->frag_off) & 0x6,
-       		!((fraginfo->flags & IP6T_FRAG_RES)
-			&& (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
-       DEBUGP("first %02X %02X %02X ", 
-       		(fraginfo->flags & IP6T_FRAG_FST),
-		ntohs(fh->frag_off) & ~0x7,
-       		!((fraginfo->flags & IP6T_FRAG_FST)
-			&& (ntohs(fh->frag_off) & ~0x7)));
-       DEBUGP("mf %02X %02X %02X ", 
-       		(fraginfo->flags & IP6T_FRAG_MF),
-		ntohs(fh->frag_off) & IP6_MF,
-       		!((fraginfo->flags & IP6T_FRAG_MF)
-			&& !((ntohs(fh->frag_off) & IP6_MF))));
-       DEBUGP("last %02X %02X %02X\n", 
-       		(fraginfo->flags & IP6T_FRAG_NMF),
-		ntohs(fh->frag_off) & IP6_MF,
-       		!((fraginfo->flags & IP6T_FRAG_NMF)
-			&& (ntohs(fh->frag_off) & IP6_MF)));
-
-       return (fh != NULL)
-       		&&
-       		(id_match(fraginfo->ids[0], fraginfo->ids[1],
+	pr_debug("INFO %04X ", fh->frag_off);
+	pr_debug("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
+	pr_debug("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6);
+	pr_debug("MF %04X ", fh->frag_off & htons(IP6_MF));
+	pr_debug("ID %u %08X\n", ntohl(fh->identification),
+		 ntohl(fh->identification));
+
+	pr_debug("IPv6 FRAG id %02X ",
+		 id_match(fraginfo->ids[0], fraginfo->ids[1],
 			  ntohl(fh->identification),
-                           !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))
-		&&
-		!((fraginfo->flags & IP6T_FRAG_RES)
-			&& (fh->reserved || (ntohs(fh->frag_off) & 0x6)))
-		&&
-		!((fraginfo->flags & IP6T_FRAG_FST)
-			&& (ntohs(fh->frag_off) & ~0x7))
-		&&
-		!((fraginfo->flags & IP6T_FRAG_MF)
-			&& !(ntohs(fh->frag_off) & IP6_MF))
-		&&
-		!((fraginfo->flags & IP6T_FRAG_NMF)
-			&& (ntohs(fh->frag_off) & IP6_MF));
+			  !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)));
+	pr_debug("res %02X %02X%04X %02X ",
+		 fraginfo->flags & IP6T_FRAG_RES, fh->reserved,
+		 ntohs(fh->frag_off) & 0x6,
+		 !((fraginfo->flags & IP6T_FRAG_RES) &&
+		   (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
+	pr_debug("first %02X %02X %02X ",
+		 fraginfo->flags & IP6T_FRAG_FST,
+		 ntohs(fh->frag_off) & ~0x7,
+		 !((fraginfo->flags & IP6T_FRAG_FST) &&
+		   (ntohs(fh->frag_off) & ~0x7)));
+	pr_debug("mf %02X %02X %02X ",
+		 fraginfo->flags & IP6T_FRAG_MF,
+		 ntohs(fh->frag_off) & IP6_MF,
+		 !((fraginfo->flags & IP6T_FRAG_MF) &&
+		   !((ntohs(fh->frag_off) & IP6_MF))));
+	pr_debug("last %02X %02X %02X\n",
+		 fraginfo->flags & IP6T_FRAG_NMF,
+		 ntohs(fh->frag_off) & IP6_MF,
+		 !((fraginfo->flags & IP6T_FRAG_NMF) &&
+		   (ntohs(fh->frag_off) & IP6_MF)));
+
+	return (fh != NULL) &&
+		id_match(fraginfo->ids[0], fraginfo->ids[1],
+			 ntohl(fh->identification),
+			 !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) &&
+		!((fraginfo->flags & IP6T_FRAG_RES) &&
+		  (fh->reserved || (ntohs(fh->frag_off) & 0x6))) &&
+		!((fraginfo->flags & IP6T_FRAG_FST) &&
+		  (ntohs(fh->frag_off) & ~0x7)) &&
+		!((fraginfo->flags & IP6T_FRAG_MF) &&
+		  !(ntohs(fh->frag_off) & IP6_MF)) &&
+		!((fraginfo->flags & IP6T_FRAG_NMF) &&
+		  (ntohs(fh->frag_off) & IP6_MF));
 }
 
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
-          void *matchinfo,
-          unsigned int matchinfosize,
-          unsigned int hook_mask)
+static int frag_mt6_check(const struct xt_mtchk_param *par)
 {
-       const struct ip6t_frag *fraginfo = matchinfo;
-
-       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_frag))) {
-              DEBUGP("ip6t_frag: matchsize %u != %u\n",
-                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_frag)));
-              return 0;
-       }
-       if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
-              DEBUGP("ip6t_frag: unknown flags %X\n",
-                      fraginfo->invflags);
-              return 0;
-       }
-
-       return 1;
+	const struct ip6t_frag *fraginfo = par->matchinfo;
+
+	if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
+		pr_debug("unknown flags %X\n", fraginfo->invflags);
+		return -EINVAL;
+	}
+	return 0;
 }
 
-static struct ip6t_match frag_match = {
+static struct xt_match frag_mt6_reg __read_mostly = {
 	.name		= "frag",
-	.match		= &match,
-	.checkentry	= &checkentry,
+	.family		= NFPROTO_IPV6,
+	.match		= frag_mt6,
+	.matchsize	= sizeof(struct ip6t_frag),
+	.checkentry	= frag_mt6_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+static int __init frag_mt6_init(void)
 {
-       return ip6t_register_match(&frag_match);
+	return xt_register_match(&frag_mt6_reg);
 }
 
-static void __exit cleanup(void)
+static void __exit frag_mt6_exit(void)
 {
-       ip6t_unregister_match(&frag_match);
+	xt_unregister_match(&frag_mt6_reg);
 }
 
-module_init(init);
-module_exit(cleanup);
+module_init(frag_mt6_init);
+module_exit(frag_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 1d09485111d..01df142bb02 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
@@ -16,103 +16,92 @@
 
 #include <asm/byteorder.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_opts.h>
 
-#define HOPBYHOP	1
-
 MODULE_LICENSE("GPL");
-#if HOPBYHOP
-MODULE_DESCRIPTION("IPv6 HbH match");
-#else
-MODULE_DESCRIPTION("IPv6 DST match");
-#endif
+MODULE_DESCRIPTION("Xtables: IPv6 Hop-By-Hop and Destination Header match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
+MODULE_ALIAS("ip6t_dst");
 
 /*
- * (Type & 0xC0) >> 6
- * 	0	-> ignorable
- * 	1	-> must drop the packet
- * 	2	-> send ICMP PARM PROB regardless and drop packet
- * 	3	-> Send ICMP if not a multicast address and drop packet
+ *  (Type & 0xC0) >> 6
+ *	0	-> ignorable
+ *	1	-> must drop the packet
+ *	2	-> send ICMP PARM PROB regardless and drop packet
+ *	3	-> Send ICMP if not a multicast address and drop packet
  *  (Type & 0x20) >> 5
- *  	0	-> invariant
- *  	1	-> can change the routing
+ *	0	-> invariant
+ *	1	-> can change the routing
  *  (Type & 0x1F) Type
- *      0	-> Pad1 (only 1 byte!)
- *      1	-> PadN LENGTH info (total length = length + 2)
- *      C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
- *      5	-> RTALERT 2 x x
+ *	0	-> Pad1 (only 1 byte!)
+ *	1	-> PadN LENGTH info (total length = length + 2)
+ *	C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
+ *	5	-> RTALERT 2 x x
  */
 
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
+static struct xt_match hbh_mt6_reg[] __read_mostly;
+
+static bool
+hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
-       struct ipv6_opt_hdr _optsh, *oh;
-       const struct ip6t_opts *optinfo = matchinfo;
-       unsigned int temp;
-       unsigned int ptr;
-       unsigned int hdrlen = 0;
-       unsigned int ret = 0;
-       u8 _opttype, *tp = NULL;
-       u8 _optlen, *lp = NULL;
-       unsigned int optlen;
-       
-#if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
-#else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
-#endif
-		return 0;
-
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       if (oh == NULL){
-	       *hotdrop = 1;
-       		return 0;
-       }
-
-       hdrlen = ipv6_optlen(oh);
-       if (skb->len - ptr < hdrlen){
-	       /* Packet smaller than it's length field */
-       		return 0;
-       }
-
-       DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
-
-       DEBUGP("len %02X %04X %02X ",
-       		optinfo->hdrlen, hdrlen,
-       		(!(optinfo->flags & IP6T_OPTS_LEN) ||
-                           ((optinfo->hdrlen == hdrlen) ^
-                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
-
-       ret = (oh != NULL)
-       		&&
-	      	(!(optinfo->flags & IP6T_OPTS_LEN) ||
-                           ((optinfo->hdrlen == hdrlen) ^
-                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
-
-       ptr += 2;
-       hdrlen -= 2;
-       if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){
-	       return ret;
-	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
-		DEBUGP("Not strict - not implemented");
+	struct ipv6_opt_hdr _optsh;
+	const struct ipv6_opt_hdr *oh;
+	const struct ip6t_opts *optinfo = par->matchinfo;
+	unsigned int temp;
+	unsigned int ptr = 0;
+	unsigned int hdrlen = 0;
+	bool ret = false;
+	u8 _opttype;
+	u8 _optlen;
+	const u_int8_t *tp = NULL;
+	const u_int8_t *lp = NULL;
+	unsigned int optlen;
+	int err;
+
+	err = ipv6_find_hdr(skb, &ptr,
+			    (par->match == &hbh_mt6_reg[0]) ?
+			    NEXTHDR_HOP : NEXTHDR_DEST, NULL, NULL);
+	if (err < 0) {
+		if (err != -ENOENT)
+			par->hotdrop = true;
+		return false;
+	}
+
+	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+	if (oh == NULL) {
+		par->hotdrop = true;
+		return false;
+	}
+
+	hdrlen = ipv6_optlen(oh);
+	if (skb->len - ptr < hdrlen) {
+		/* Packet smaller than it's length field */
+		return false;
+	}
+
+	pr_debug("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
+
+	pr_debug("len %02X %04X %02X ",
+		 optinfo->hdrlen, hdrlen,
+		 (!(optinfo->flags & IP6T_OPTS_LEN) ||
+		  ((optinfo->hdrlen == hdrlen) ^
+		   !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
+
+	ret = (oh != NULL) &&
+	      (!(optinfo->flags & IP6T_OPTS_LEN) ||
+	       ((optinfo->hdrlen == hdrlen) ^
+		!!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
+
+	ptr += 2;
+	hdrlen -= 2;
+	if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
+		return ret;
 	} else {
-		DEBUGP("Strict ");
-		DEBUGP("#%d ",optinfo->optsnr);
-		for(temp=0; temp<optinfo->optsnr; temp++){
+		pr_debug("Strict ");
+		pr_debug("#%d ", optinfo->optsnr);
+		for (temp = 0; temp < optinfo->optsnr; temp++) {
 			/* type field exists ? */
 			if (hdrlen < 1)
 				break;
@@ -122,13 +111,12 @@ match(const struct sk_buff *skb,
 				break;
 
 			/* Type check */
-			if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){
-				DEBUGP("Tbad %02X %02X\n",
-				       *tp,
-				       (optinfo->opts[temp] & 0xFF00)>>8);
-				return 0;
+			if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) {
+				pr_debug("Tbad %02X %02X\n", *tp,
+					 (optinfo->opts[temp] & 0xFF00) >> 8);
+				return false;
 			} else {
-				DEBUGP("Tok ");
+				pr_debug("Tok ");
 			}
 			/* Length check */
 			if (*tp) {
@@ -145,23 +133,23 @@ match(const struct sk_buff *skb,
 				spec_len = optinfo->opts[temp] & 0x00FF;
 
 				if (spec_len != 0x00FF && spec_len != *lp) {
-					DEBUGP("Lbad %02X %04X\n", *lp,
-					       spec_len);
-					return 0;
+					pr_debug("Lbad %02X %04X\n", *lp,
+						 spec_len);
+					return false;
 				}
-				DEBUGP("Lok ");
+				pr_debug("Lok ");
 				optlen = *lp + 2;
 			} else {
-				DEBUGP("Pad1\n");
+				pr_debug("Pad1\n");
 				optlen = 1;
 			}
 
 			/* Step to the next */
-			DEBUGP("len%04X \n", optlen);
+			pr_debug("len%04X\n", optlen);
 
 			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
-			    (temp < optinfo->optsnr - 1)) {
-				DEBUGP("new pointer is too large! \n");
+			    temp < optinfo->optsnr - 1) {
+				pr_debug("new pointer is too large!\n");
 				break;
 			}
 			ptr += optlen;
@@ -169,56 +157,59 @@ match(const struct sk_buff *skb,
 		}
 		if (temp == optinfo->optsnr)
 			return ret;
-		else return 0;
+		else
+			return false;
 	}
 
-	return 0;
+	return false;
 }
 
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
-          void *matchinfo,
-          unsigned int matchinfosize,
-          unsigned int hook_mask)
+static int hbh_mt6_check(const struct xt_mtchk_param *par)
 {
-       const struct ip6t_opts *optsinfo = matchinfo;
-
-       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
-              DEBUGP("ip6t_opts: matchsize %u != %u\n",
-                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
-              return 0;
-       }
-       if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
-              DEBUGP("ip6t_opts: unknown flags %X\n",
-                      optsinfo->invflags);
-              return 0;
-       }
-
-       return 1;
+	const struct ip6t_opts *optsinfo = par->matchinfo;
+
+	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
+		pr_debug("unknown flags %X\n", optsinfo->invflags);
+		return -EINVAL;
+	}
+
+	if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
+		pr_debug("Not strict - not implemented");
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
-static struct ip6t_match opts_match = {
-#if HOPBYHOP
-	.name		= "hbh",
-#else
-	.name		= "dst",
-#endif
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
+static struct xt_match hbh_mt6_reg[] __read_mostly = {
+	{
+		/* Note, hbh_mt6 relies on the order of hbh_mt6_reg */
+		.name		= "hbh",
+		.family		= NFPROTO_IPV6,
+		.match		= hbh_mt6,
+		.matchsize	= sizeof(struct ip6t_opts),
+		.checkentry	= hbh_mt6_check,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "dst",
+		.family		= NFPROTO_IPV6,
+		.match		= hbh_mt6,
+		.matchsize	= sizeof(struct ip6t_opts),
+		.checkentry	= hbh_mt6_check,
+		.me		= THIS_MODULE,
+	},
 };
 
-static int __init init(void)
+static int __init hbh_mt6_init(void)
 {
-       return ip6t_register_match(&opts_match);
+	return xt_register_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
 }
 
-static void __exit cleanup(void)
+static void __exit hbh_mt6_exit(void)
 {
-       ip6t_unregister_match(&opts_match);
+	xt_unregister_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
 }
 
-module_init(init);
-module_exit(cleanup);
+module_init(hbh_mt6_init);
+module_exit(hbh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
deleted file mode 100644
index 0beaff5471d..00000000000
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Hop Limit matching module */
-
-/* (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv>
- * Based on HW's ttl module
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv6/ip6t_hl.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
-MODULE_DESCRIPTION("IP tables Hop Limit matching module");
-MODULE_LICENSE("GPL");
-
-static int match(const struct sk_buff *skb, const struct net_device *in,
-		 const struct net_device *out, const void *matchinfo,
-		 int offset, unsigned int protoff,
-		 int *hotdrop)
-{
-	const struct ip6t_hl_info *info = matchinfo;
-	const struct ipv6hdr *ip6h = skb->nh.ipv6h;
-
-	switch (info->mode) {
-		case IP6T_HL_EQ:
-			return (ip6h->hop_limit == info->hop_limit);
-			break;
-		case IP6T_HL_NE:
-			return (!(ip6h->hop_limit == info->hop_limit));
-			break;
-		case IP6T_HL_LT:
-			return (ip6h->hop_limit < info->hop_limit);
-			break;
-		case IP6T_HL_GT:
-			return (ip6h->hop_limit > info->hop_limit);
-			break;
-		default:
-			printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", 
-				info->mode);
-			return 0;
-	}
-
-	return 0;
-}
-
-static int checkentry(const char *tablename, const struct ip6t_ip6 *ip,
-		      void *matchinfo, unsigned int matchsize,
-		      unsigned int hook_mask)
-{
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_hl_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ip6t_match hl_match = {
-	.name		= "hl",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&hl_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&hl_match);
-
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 32e67f05845..54bd9790603 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -1,7 +1,7 @@
 /* ipv6header match - matches IPv6 packets based
    on whether they contain certain headers */
 
-/* Original idea: Brad Chapman 
+/* Original idea: Brad Chapman
  * Rewritten by: Andras Kis-Szabo <kisza@sch.bme.hu> */
 
 /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
@@ -18,23 +18,18 @@
 #include <net/checksum.h>
 #include <net/ipv6.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_ipv6header.h>
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 headers match");
+MODULE_DESCRIPTION("Xtables: IPv6 header types match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-static int
-ipv6header_match(const struct sk_buff *skb,
-		 const struct net_device *in,
-		 const struct net_device *out,
-		 const void *matchinfo,
-		 int offset,
-		 unsigned int protoff,
-		 int *hotdrop)
+static bool
+ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	const struct ip6t_ipv6header_info *info = matchinfo;
+	const struct ip6t_ipv6header_info *info = par->matchinfo;
 	unsigned int temp;
 	int len;
 	u8 nexthdr;
@@ -43,27 +38,28 @@ ipv6header_match(const struct sk_buff *skb,
 	/* Make sure this isn't an evil packet */
 
 	/* type of the 1st exthdr */
-	nexthdr = skb->nh.ipv6h->nexthdr;
+	nexthdr = ipv6_hdr(skb)->nexthdr;
 	/* pointer to the 1st exthdr */
 	ptr = sizeof(struct ipv6hdr);
 	/* available length */
 	len = skb->len - ptr;
 	temp = 0;
 
-        while (ip6t_ext_hdr(nexthdr)) {
-		struct ipv6_opt_hdr _hdr, *hp;
-        	int hdrlen;
+	while (ip6t_ext_hdr(nexthdr)) {
+		const struct ipv6_opt_hdr *hp;
+		struct ipv6_opt_hdr _hdr;
+		int hdrlen;
 
-		/* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
 		/* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
+		if (nexthdr == NEXTHDR_NONE) {
 			temp |= MASK_NONE;
 			break;
 		}
+		/* Is there enough space for the next ext header? */
+		if (len < (int)sizeof(struct ipv6_opt_hdr))
+			return false;
 		/* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
+		if (nexthdr == NEXTHDR_ESP) {
 			temp |= MASK_ESP;
 			break;
 		}
@@ -72,43 +68,43 @@ ipv6header_match(const struct sk_buff *skb,
 		BUG_ON(hp == NULL);
 
 		/* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
+		if (nexthdr == NEXTHDR_FRAGMENT)
+			hdrlen = 8;
+		else if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hp->hdrlen + 2) << 2;
+		else
+			hdrlen = ipv6_optlen(hp);
 
 		/* set the flag */
-		switch (nexthdr){
-			case NEXTHDR_HOP:
-				temp |= MASK_HOPOPTS;
-				break;
-			case NEXTHDR_ROUTING:
-				temp |= MASK_ROUTING;
-				break;
-			case NEXTHDR_FRAGMENT:
-				temp |= MASK_FRAGMENT;
-				break;
-			case NEXTHDR_AUTH:
-				temp |= MASK_AH;
-				break;
-			case NEXTHDR_DEST:
-				temp |= MASK_DSTOPTS;
-				break;
-			default:
-				return 0;
-				break;
+		switch (nexthdr) {
+		case NEXTHDR_HOP:
+			temp |= MASK_HOPOPTS;
+			break;
+		case NEXTHDR_ROUTING:
+			temp |= MASK_ROUTING;
+			break;
+		case NEXTHDR_FRAGMENT:
+			temp |= MASK_FRAGMENT;
+			break;
+		case NEXTHDR_AUTH:
+			temp |= MASK_AH;
+			break;
+		case NEXTHDR_DEST:
+			temp |= MASK_DSTOPTS;
+			break;
+		default:
+			return false;
+			break;
 		}
 
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
+		nexthdr = hp->nexthdr;
+		len -= hdrlen;
+		ptr += hdrlen;
 		if (ptr > skb->len)
 			break;
-        }
+	}
 
-	if ( (nexthdr != NEXTHDR_NONE ) && (nexthdr != NEXTHDR_ESP) )
+	if (nexthdr != NEXTHDR_NONE && nexthdr != NEXTHDR_ESP)
 		temp |= MASK_PROTO;
 
 	if (info->modeflag)
@@ -122,46 +118,37 @@ ipv6header_match(const struct sk_buff *skb,
 	}
 }
 
-static int
-ipv6header_checkentry(const char *tablename,
-		      const struct ip6t_ip6 *ip,
-		      void *matchinfo,
-		      unsigned int matchsize,
-		      unsigned int hook_mask)
+static int ipv6header_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_ipv6header_info *info = matchinfo;
-
-	/* Check for obvious errors */
-	/* This match is valid in all hooks! */
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_ipv6header_info)))
-		return 0;
+	const struct ip6t_ipv6header_info *info = par->matchinfo;
 
 	/* invflags is 0 or 0xff in hard mode */
-	if ((!info->modeflag) && info->invflags != 0x00
-			      && info->invflags != 0xFF)
-		return 0;
+	if ((!info->modeflag) && info->invflags != 0x00 &&
+	    info->invflags != 0xFF)
+		return -EINVAL;
 
-	return 1;
+	return 0;
 }
 
-static struct ip6t_match ip6t_ipv6header_match = {
+static struct xt_match ipv6header_mt6_reg __read_mostly = {
 	.name		= "ipv6header",
-	.match		= &ipv6header_match,
-	.checkentry	= &ipv6header_checkentry,
+	.family		= NFPROTO_IPV6,
+	.match		= ipv6header_mt6,
+	.matchsize	= sizeof(struct ip6t_ipv6header_info),
+	.checkentry	= ipv6header_mt6_check,
 	.destroy	= NULL,
 	.me		= THIS_MODULE,
 };
 
-static int  __init ipv6header_init(void)
+static int __init ipv6header_mt6_init(void)
 {
-	return ip6t_register_match(&ip6t_ipv6header_match);
+	return xt_register_match(&ipv6header_mt6_reg);
 }
 
-static void __exit ipv6header_exit(void)
+static void __exit ipv6header_mt6_exit(void)
 {
-	ip6t_unregister_match(&ip6t_ipv6header_match);
+	xt_unregister_match(&ipv6header_mt6_reg);
 }
 
-module_init(ipv6header_init);
-module_exit(ipv6header_exit);
-
+module_init(ipv6header_mt6_init);
+module_exit(ipv6header_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_length.c b/net/ipv6/netfilter/ip6t_length.c
deleted file mode 100644
index e0537d3811d..00000000000
--- a/net/ipv6/netfilter/ip6t_length.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/* Length Match - IPv6 Port */
-
-/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv6/ip6t_length.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("IPv6 packet length match");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct ip6t_length_info *info = matchinfo;
-	u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
-	
-	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ip6t_ip6 *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_length_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ip6t_match length_match = {
-	.name		= "length",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&length_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&length_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_limit.c b/net/ipv6/netfilter/ip6t_limit.c
deleted file mode 100644
index fb782f610be..00000000000
--- a/net/ipv6/netfilter/ip6t_limit.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/* Kernel module to control the rate
- *
- * 2 September 1999: Changed from the target RATE to the match
- *                   `limit', removed logging.  Did I mention that
- *                   Alexey is a fucking genius?
- *                   Rusty Russell (rusty@rustcorp.com.au).  */
-
-/* (C) 1999 J�r�me de Vivie <devivie@info.enserb.u-bordeaux.fr>
- * (C) 1999 Herv� Eychenne <eychenne@info.enserb.u-bordeaux.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_limit.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
-MODULE_DESCRIPTION("rate limiting within ip6tables");
-
-/* The algorithm used is the Simple Token Bucket Filter (TBF)
- * see net/sched/sch_tbf.c in the linux source tree
- */
-
-static DEFINE_SPINLOCK(limit_lock);
-
-/* Rusty: This is my (non-mathematically-inclined) understanding of
-   this algorithm.  The `average rate' in jiffies becomes your initial
-   amount of credit `credit' and the most credit you can ever have
-   `credit_cap'.  The `peak rate' becomes the cost of passing the
-   test, `cost'.
-
-   `prev' tracks the last packet hit: you gain one credit per jiffy.
-   If you get credit balance more than this, the extra credit is
-   discarded.  Every time the match passes, you lose `cost' credits;
-   if you don't have that many, the test fails.
-
-   See Alexey's formal explanation in net/sched/sch_tbf.c.
-
-   To avoid underflow, we multiply by 128 (ie. you get 128 credits per
-   jiffy).  Hence a cost of 2^32-1, means one pass per 32768 seconds
-   at 1024HZ (or one every 9 hours).  A cost of 1 means 12800 passes
-   per second at 100HZ.  */
-
-#define CREDITS_PER_JIFFY 128
-
-static int
-ip6t_limit_match(const struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		const void *matchinfo,
-		int offset,
-		unsigned int protoff,
-		int *hotdrop)
-{
-	struct ip6t_rateinfo *r = ((struct ip6t_rateinfo *)matchinfo)->master;
-	unsigned long now = jiffies;
-
-	spin_lock_bh(&limit_lock);
-	r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
-	if (r->credit > r->credit_cap)
-		r->credit = r->credit_cap;
-
-	if (r->credit >= r->cost) {
-		/* We're not limited. */
-		r->credit -= r->cost;
-		spin_unlock_bh(&limit_lock);
-		return 1;
-	}
-
-       	spin_unlock_bh(&limit_lock);
-	return 0;
-}
-
-/* Precision saver. */
-static u_int32_t
-user2credits(u_int32_t user)
-{
-	/* If multiplying would overflow... */
-	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
-		/* Divide first. */
-		return (user / IP6T_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
-
-	return (user * HZ * CREDITS_PER_JIFFY) / IP6T_LIMIT_SCALE;
-}
-
-static int
-ip6t_limit_checkentry(const char *tablename,
-		     const struct ip6t_ip6 *ip,
-		     void *matchinfo,
-		     unsigned int matchsize,
-		     unsigned int hook_mask)
-{
-	struct ip6t_rateinfo *r = matchinfo;
-
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_rateinfo)))
-		return 0;
-
-	/* Check for overflow. */
-	if (r->burst == 0
-	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		printk("Call rusty: overflow in ip6t_limit: %u/%u\n",
-		       r->avg, r->burst);
-		return 0;
-	}
-
-	/* User avg in seconds * IP6T_LIMIT_SCALE: convert to jiffies *
-	   128. */
-	r->prev = jiffies;
-	r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
-	r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
-	r->cost = user2credits(r->avg);
-
-	/* For SMP, we only want to use one set of counters. */
-	r->master = r;
-
-	return 1;
-}
-
-static struct ip6t_match ip6t_limit_reg = {
-	.name		= "limit",
-	.match		= ip6t_limit_match,
-	.checkentry	= ip6t_limit_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	if (ip6t_register_match(&ip6t_limit_reg))
-		return -EINVAL;
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&ip6t_limit_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_mac.c b/net/ipv6/netfilter/ip6t_mac.c
deleted file mode 100644
index 526d43e3723..00000000000
--- a/net/ipv6/netfilter/ip6t_mac.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Kernel module to match MAC address parameters. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/if_ether.h>
-
-#include <linux/netfilter_ipv6/ip6t_mac.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MAC address matching module for IPv6");
-MODULE_AUTHOR("Netfilter Core Teaam <coreteam@netfilter.org>");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-    const struct ip6t_mac_info *info = matchinfo;
-
-    /* Is mac pointer valid? */
-    return (skb->mac.raw >= skb->head
-	    && (skb->mac.raw + ETH_HLEN) <= skb->data
-	    /* If so, compare... */
-	    && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN)
-		== 0) ^ info->invert));
-}
-
-static int
-ip6t_mac_checkentry(const char *tablename,
-		   const struct ip6t_ip6 *ip,
-		   void *matchinfo,
-		   unsigned int matchsize,
-		   unsigned int hook_mask)
-{
-	if (hook_mask
-	    & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN)
-		| (1 << NF_IP6_FORWARD))) {
-		printk("ip6t_mac: only valid for PRE_ROUTING, LOCAL_IN or"
-		       " FORWARD\n");
-		return 0;
-	}
-
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mac_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ip6t_match mac_match = {
-	.name		= "mac",
-	.match		= &match,
-	.checkentry	= &ip6t_mac_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&mac_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&mac_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_mark.c b/net/ipv6/netfilter/ip6t_mark.c
deleted file mode 100644
index affc3de364f..00000000000
--- a/net/ipv6/netfilter/ip6t_mark.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/* Kernel module to match NFMARK values. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv6/ip6t_mark.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("ip6tables mark match");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct ip6t_mark_info *info = matchinfo;
-
-	return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ip6t_ip6 *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mark_info)))
-		return 0;
-
-	return 1;
-}
-
-static struct ip6t_match mark_match = {
-	.name		= "mark",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&mark_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&mark_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
new file mode 100644
index 00000000000..0c90c66b199
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C)2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *	Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com>
+ *
+ * Based on net/netfilter/xt_tcpudp.c
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/types.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/mip6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6t_mh.h>
+
+MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match");
+MODULE_LICENSE("GPL");
+
+/* Returns 1 if the type is matched by the range, 0 otherwise */
+static inline bool
+type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
+{
+	return (type >= min && type <= max) ^ invert;
+}
+
+static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct ip6_mh _mh;
+	const struct ip6_mh *mh;
+	const struct ip6t_mh *mhinfo = par->matchinfo;
+
+	/* Must not be a fragment. */
+	if (par->fragoff != 0)
+		return false;
+
+	mh = skb_header_pointer(skb, par->thoff, sizeof(_mh), &_mh);
+	if (mh == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		pr_debug("Dropping evil MH tinygram.\n");
+		par->hotdrop = true;
+		return false;
+	}
+
+	if (mh->ip6mh_proto != IPPROTO_NONE) {
+		pr_debug("Dropping invalid MH Payload Proto: %u\n",
+			 mh->ip6mh_proto);
+		par->hotdrop = true;
+		return false;
+	}
+
+	return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type,
+			  !!(mhinfo->invflags & IP6T_MH_INV_TYPE));
+}
+
+static int mh_mt6_check(const struct xt_mtchk_param *par)
+{
+	const struct ip6t_mh *mhinfo = par->matchinfo;
+
+	/* Must specify no unknown invflags */
+	return (mhinfo->invflags & ~IP6T_MH_INV_MASK) ? -EINVAL : 0;
+}
+
+static struct xt_match mh_mt6_reg __read_mostly = {
+	.name		= "mh",
+	.family		= NFPROTO_IPV6,
+	.checkentry	= mh_mt6_check,
+	.match		= mh_mt6,
+	.matchsize	= sizeof(struct ip6t_mh),
+	.proto		= IPPROTO_MH,
+	.me		= THIS_MODULE,
+};
+
+static int __init mh_mt6_init(void)
+{
+	return xt_register_match(&mh_mt6_reg);
+}
+
+static void __exit mh_mt6_exit(void)
+{
+	xt_unregister_match(&mh_mt6_reg);
+}
+
+module_init(mh_mt6_init);
+module_exit(mh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_multiport.c b/net/ipv6/netfilter/ip6t_multiport.c
deleted file mode 100644
index 6e3246153fa..00000000000
--- a/net/ipv6/netfilter/ip6t_multiport.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/* Kernel module to match one of a list of TCP/UDP ports: ports are in
-   the same place so we can treat them as equal. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/udp.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-
-#include <linux/netfilter_ipv6/ip6t_multiport.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("ip6tables match for multiple ports");
-
-#if 0
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
-/* Returns 1 if the port is matched by the test, 0 otherwise. */
-static inline int
-ports_match(const u_int16_t *portlist, enum ip6t_multiport_flags flags,
-	    u_int8_t count, u_int16_t src, u_int16_t dst)
-{
-	unsigned int i;
-	for (i=0; i<count; i++) {
-		if (flags != IP6T_MULTIPORT_DESTINATION
-		    && portlist[i] == src)
-			return 1;
-
-		if (flags != IP6T_MULTIPORT_SOURCE
-		    && portlist[i] == dst)
-			return 1;
-	}
-
-	return 0;
-}
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	u16 _ports[2], *pptr;
-	const struct ip6t_multiport *multiinfo = matchinfo;
-
-	/* Must not be a fragment. */
-	if (offset)
-		return 0;
-
-	/* Must be big enough to read ports (both UDP and TCP have
-	   them at the start). */
-	pptr = skb_header_pointer(skb, protoff, sizeof(_ports), &_ports[0]);
-	if (pptr == NULL) {
-		/* We've been asked to examine this packet, and we
-		 * can't.  Hence, no choice but to drop.
-		 */
-		duprintf("ip6t_multiport:"
-			 " Dropping evil offset=0 tinygram.\n");
-		*hotdrop = 1;
-		return 0;
-	}
-
-	return ports_match(multiinfo->ports,
-			   multiinfo->flags, multiinfo->count,
-			   ntohs(pptr[0]), ntohs(pptr[1]));
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
-	   const struct ip6t_ip6 *ip,
-	   void *matchinfo,
-	   unsigned int matchsize,
-	   unsigned int hook_mask)
-{
-	const struct ip6t_multiport *multiinfo = matchinfo;
-
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_multiport)))
-		return 0;
-
-	/* Must specify proto == TCP/UDP, no unknown flags or bad count */
-	return (ip->proto == IPPROTO_TCP || ip->proto == IPPROTO_UDP)
-		&& !(ip->invflags & IP6T_INV_PROTO)
-		&& matchsize == IP6T_ALIGN(sizeof(struct ip6t_multiport))
-		&& (multiinfo->flags == IP6T_MULTIPORT_SOURCE
-		    || multiinfo->flags == IP6T_MULTIPORT_DESTINATION
-		    || multiinfo->flags == IP6T_MULTIPORT_EITHER)
-		&& multiinfo->count <= IP6T_MULTI_PORTS;
-}
-
-static struct ip6t_match multiport_match = {
-	.name		= "multiport",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&multiport_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&multiport_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
deleted file mode 100644
index 4de4cdad4b7..00000000000
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/* Kernel module to match various things tied to sockets associated with
-   locally generated outgoing packets. */
-
-/* (C) 2000-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/file.h>
-#include <linux/rcupdate.h>
-#include <net/sock.h>
-
-#include <linux/netfilter_ipv6/ip6t_owner.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("IP6 tables owner matching module");
-MODULE_LICENSE("GPL");
-
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct ip6t_owner_info *info = matchinfo;
-
-	if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
-		return 0;
-
-	if(info->match & IP6T_OWNER_UID) {
-		if((skb->sk->sk_socket->file->f_uid != info->uid) ^
-		    !!(info->invert & IP6T_OWNER_UID))
-			return 0;
-	}
-
-	if(info->match & IP6T_OWNER_GID) {
-		if((skb->sk->sk_socket->file->f_gid != info->gid) ^
-		    !!(info->invert & IP6T_OWNER_GID))
-			return 0;
-	}
-
-	return 1;
-}
-
-static int
-checkentry(const char *tablename,
-           const struct ip6t_ip6 *ip,
-           void *matchinfo,
-           unsigned int matchsize,
-           unsigned int hook_mask)
-{
-	const struct ip6t_owner_info *info = matchinfo;
-
-        if (hook_mask
-            & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) {
-                printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
-                return 0;
-        }
-
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info)))
-		return 0;
-
-	if (info->match & (IP6T_OWNER_PID|IP6T_OWNER_SID)) {
-		printk("ipt_owner: pid and sid matching "
-		       "not supported anymore\n");
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct ip6t_match owner_match = {
-	.name		= "owner",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&owner_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&owner_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_physdev.c b/net/ipv6/netfilter/ip6t_physdev.c
deleted file mode 100644
index 71515c86ece..00000000000
--- a/net/ipv6/netfilter/ip6t_physdev.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/* Kernel module to match the bridge port in and
- * out device for IP packets coming into contact with a bridge. */
-
-/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv6/ip6t_physdev.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_bridge.h>
-#define MATCH   1
-#define NOMATCH 0
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
-MODULE_DESCRIPTION("iptables bridge physical device match module");
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	int i;
-	static const char nulldevname[IFNAMSIZ];
-	const struct ip6t_physdev_info *info = matchinfo;
-	unsigned int ret;
-	const char *indev, *outdev;
-	struct nf_bridge_info *nf_bridge;
-
-	/* Not a bridged IP packet or no info available yet:
-	 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
-	 * the destination device will be a bridge. */
-	if (!(nf_bridge = skb->nf_bridge)) {
-		/* Return MATCH if the invert flags of the used options are on */
-		if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_BRIDGED))
-			return NOMATCH;
-		if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_ISIN))
-			return NOMATCH;
-		if ((info->bitmask & IP6T_PHYSDEV_OP_ISOUT) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_ISOUT))
-			return NOMATCH;
-		if ((info->bitmask & IP6T_PHYSDEV_OP_IN) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_IN))
-			return NOMATCH;
-		if ((info->bitmask & IP6T_PHYSDEV_OP_OUT) &&
-		    !(info->invert & IP6T_PHYSDEV_OP_OUT))
-			return NOMATCH;
-		return MATCH;
-	}
-
-	/* This only makes sense in the FORWARD and POSTROUTING chains */
-	if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) &&
-	    (!!(nf_bridge->mask & BRNF_BRIDGED) ^
-	    !(info->invert & IP6T_PHYSDEV_OP_BRIDGED)))
-		return NOMATCH;
-
-	if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN &&
-	    (!nf_bridge->physindev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISIN))) ||
-	    (info->bitmask & IP6T_PHYSDEV_OP_ISOUT &&
-	    (!nf_bridge->physoutdev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISOUT))))
-		return NOMATCH;
-
-	if (!(info->bitmask & IP6T_PHYSDEV_OP_IN))
-		goto match_outdev;
-	indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
-		ret |= (((const unsigned int *)indev)[i]
-			^ ((const unsigned int *)info->physindev)[i])
-			& ((const unsigned int *)info->in_mask)[i];
-	}
-
-	if ((ret == 0) ^ !(info->invert & IP6T_PHYSDEV_OP_IN))
-		return NOMATCH;
-
-match_outdev:
-	if (!(info->bitmask & IP6T_PHYSDEV_OP_OUT))
-		return MATCH;
-	outdev = nf_bridge->physoutdev ?
-		 nf_bridge->physoutdev->name : nulldevname;
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
-		ret |= (((const unsigned int *)outdev)[i]
-			^ ((const unsigned int *)info->physoutdev)[i])
-			& ((const unsigned int *)info->out_mask)[i];
-	}
-
-	return (ret != 0) ^ !(info->invert & IP6T_PHYSDEV_OP_OUT);
-}
-
-static int
-checkentry(const char *tablename,
-		       const struct ip6t_ip6 *ip,
-		       void *matchinfo,
-		       unsigned int matchsize,
-		       unsigned int hook_mask)
-{
-	const struct ip6t_physdev_info *info = matchinfo;
-
-	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_physdev_info)))
-		return 0;
-	if (!(info->bitmask & IP6T_PHYSDEV_OP_MASK) ||
-	    info->bitmask & ~IP6T_PHYSDEV_OP_MASK)
-		return 0;
-	return 1;
-}
-
-static struct ip6t_match physdev_match = {
-	.name		= "physdev",
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init init(void)
-{
-	return ip6t_register_match(&physdev_match);
-}
-
-static void __exit fini(void)
-{
-	ip6t_unregister_match(&physdev_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
new file mode 100644
index 00000000000..790e0c6b19e
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/route.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("Xtables: IPv6 reverse path filter match");
+
+static bool rpfilter_addr_unicast(const struct in6_addr *addr)
+{
+	int addr_type = ipv6_addr_type(addr);
+	return addr_type & IPV6_ADDR_UNICAST;
+}
+
+static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
+				     const struct net_device *dev, u8 flags)
+{
+	struct rt6_info *rt;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+	bool ret = false;
+	struct flowi6 fl6 = {
+		.flowi6_iif = LOOPBACK_IFINDEX,
+		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+		.flowi6_proto = iph->nexthdr,
+		.daddr = iph->saddr,
+	};
+	int lookup_flags;
+
+	if (rpfilter_addr_unicast(&iph->daddr)) {
+		memcpy(&fl6.saddr, &iph->daddr, sizeof(struct in6_addr));
+		lookup_flags = RT6_LOOKUP_F_HAS_SADDR;
+	} else {
+		lookup_flags = 0;
+	}
+
+	fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+	if ((flags & XT_RPFILTER_LOOSE) == 0) {
+		fl6.flowi6_oif = dev->ifindex;
+		lookup_flags |= RT6_LOOKUP_F_IFACE;
+	}
+
+	rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags);
+	if (rt->dst.error)
+		goto out;
+
+	if (rt->rt6i_flags & (RTF_REJECT|RTF_ANYCAST))
+		goto out;
+
+	if (rt->rt6i_flags & RTF_LOCAL) {
+		ret = flags & XT_RPFILTER_ACCEPT_LOCAL;
+		goto out;
+	}
+
+	if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+		ret = true;
+ out:
+	ip6_rt_put(rt);
+	return ret;
+}
+
+static bool rpfilter_is_local(const struct sk_buff *skb)
+{
+	const struct rt6_info *rt = (const void *) skb_dst(skb);
+	return rt && (rt->rt6i_flags & RTF_LOCAL);
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_rpfilter_info *info = par->matchinfo;
+	int saddrtype;
+	struct ipv6hdr *iph;
+	bool invert = info->flags & XT_RPFILTER_INVERT;
+
+	if (rpfilter_is_local(skb))
+		return true ^ invert;
+
+	iph = ipv6_hdr(skb);
+	saddrtype = ipv6_addr_type(&iph->saddr);
+	if (unlikely(saddrtype == IPV6_ADDR_ANY))
+		return true ^ invert; /* not routable: forward path will drop it */
+
+	return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_rpfilter_info *info = par->matchinfo;
+	unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+
+	if (info->flags & options) {
+		pr_info("unknown options encountered");
+		return -EINVAL;
+	}
+
+	if (strcmp(par->table, "mangle") != 0 &&
+	    strcmp(par->table, "raw") != 0) {
+		pr_info("match only valid in the \'raw\' "
+			"or \'mangle\' tables, not \'%s\'.\n", par->table);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+	.name		= "rpfilter",
+	.family		= NFPROTO_IPV6,
+	.checkentry	= rpfilter_check,
+	.match		= rpfilter_mt,
+	.matchsize	= sizeof(struct xt_rpfilter_info),
+	.hooks		= (1 << NF_INET_PRE_ROUTING),
+	.me		= THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+	return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+	xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index beb2fd5cebb..2c99b94eeca 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
@@ -16,121 +16,120 @@
 
 #include <asm/byteorder.h>
 
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_rt.h>
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IPv6 RT match");
+MODULE_DESCRIPTION("Xtables: IPv6 Routing Header match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 /* Returns 1 if the id is matched by the range, 0 otherwise */
-static inline int
-segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
+static inline bool
+segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
-       int r=0;
-       DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
-              min,id,max);
-       r=(id >= min && id <= max) ^ invert;
-       DEBUGP(" result %s\n",r? "PASS" : "FAILED");
-       return r;
+	bool r;
+	pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n",
+		 invert ? '!' : ' ', min, id, max);
+	r = (id >= min && id <= max) ^ invert;
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+	return r;
 }
 
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
+static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
-       struct ipv6_rt_hdr _route, *rh;
-       const struct ip6t_rt *rtinfo = matchinfo;
-       unsigned int temp;
-       unsigned int ptr;
-       unsigned int hdrlen = 0;
-       unsigned int ret = 0;
-       struct in6_addr *ap, _addr;
-
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0)
-		return 0;
-
-       rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
-       if (rh == NULL){
-	       *hotdrop = 1;
-       		return 0;
-       }
-
-       hdrlen = ipv6_optlen(rh);
-       if (skb->len - ptr < hdrlen){
-	       /* Pcket smaller than its length field */
-       		return 0;
-       }
-
-       DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
-       DEBUGP("TYPE %04X ", rh->type);
-       DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
-
-       DEBUGP("IPv6 RT segsleft %02X ",
-       		(segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
-                           rh->segments_left,
-                           !!(rtinfo->invflags & IP6T_RT_INV_SGS))));
-       DEBUGP("type %02X %02X %02X ",
-       		rtinfo->rt_type, rh->type, 
-       		(!(rtinfo->flags & IP6T_RT_TYP) ||
-                           ((rtinfo->rt_type == rh->type) ^
-                           !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
-       DEBUGP("len %02X %04X %02X ",
-       		rtinfo->hdrlen, hdrlen,
-       		(!(rtinfo->flags & IP6T_RT_LEN) ||
-                           ((rtinfo->hdrlen == hdrlen) ^
-                           !!(rtinfo->invflags & IP6T_RT_INV_LEN))));
-       DEBUGP("res %02X %02X %02X ", 
-       		(rtinfo->flags & IP6T_RT_RES), ((struct rt0_hdr *)rh)->reserved,
-       		!((rtinfo->flags & IP6T_RT_RES) && (((struct rt0_hdr *)rh)->reserved)));
-
-       ret = (rh != NULL)
-       		&&
-       		(segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
-                           rh->segments_left,
-                           !!(rtinfo->invflags & IP6T_RT_INV_SGS)))
-		&&
-	      	(!(rtinfo->flags & IP6T_RT_LEN) ||
-                           ((rtinfo->hdrlen == hdrlen) ^
-                           !!(rtinfo->invflags & IP6T_RT_INV_LEN)))
-		&&
-       		(!(rtinfo->flags & IP6T_RT_TYP) ||
-                           ((rtinfo->rt_type == rh->type) ^
-                           !!(rtinfo->invflags & IP6T_RT_INV_TYP)));
+	struct ipv6_rt_hdr _route;
+	const struct ipv6_rt_hdr *rh;
+	const struct ip6t_rt *rtinfo = par->matchinfo;
+	unsigned int temp;
+	unsigned int ptr = 0;
+	unsigned int hdrlen = 0;
+	bool ret = false;
+	struct in6_addr _addr;
+	const struct in6_addr *ap;
+	int err;
+
+	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL, NULL);
+	if (err < 0) {
+		if (err != -ENOENT)
+			par->hotdrop = true;
+		return false;
+	}
+
+	rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
+	if (rh == NULL) {
+		par->hotdrop = true;
+		return false;
+	}
+
+	hdrlen = ipv6_optlen(rh);
+	if (skb->len - ptr < hdrlen) {
+		/* Pcket smaller than its length field */
+		return false;
+	}
+
+	pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
+	pr_debug("TYPE %04X ", rh->type);
+	pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
+
+	pr_debug("IPv6 RT segsleft %02X ",
+		 segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+				rh->segments_left,
+				!!(rtinfo->invflags & IP6T_RT_INV_SGS)));
+	pr_debug("type %02X %02X %02X ",
+		 rtinfo->rt_type, rh->type,
+		 (!(rtinfo->flags & IP6T_RT_TYP) ||
+		  ((rtinfo->rt_type == rh->type) ^
+		   !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
+	pr_debug("len %02X %04X %02X ",
+		 rtinfo->hdrlen, hdrlen,
+		 !(rtinfo->flags & IP6T_RT_LEN) ||
+		  ((rtinfo->hdrlen == hdrlen) ^
+		   !!(rtinfo->invflags & IP6T_RT_INV_LEN)));
+	pr_debug("res %02X %02X %02X ",
+		 rtinfo->flags & IP6T_RT_RES,
+		 ((const struct rt0_hdr *)rh)->reserved,
+		 !((rtinfo->flags & IP6T_RT_RES) &&
+		   (((const struct rt0_hdr *)rh)->reserved)));
+
+	ret = (rh != NULL) &&
+	      (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+			      rh->segments_left,
+			      !!(rtinfo->invflags & IP6T_RT_INV_SGS))) &&
+	      (!(rtinfo->flags & IP6T_RT_LEN) ||
+	       ((rtinfo->hdrlen == hdrlen) ^
+		!!(rtinfo->invflags & IP6T_RT_INV_LEN))) &&
+	      (!(rtinfo->flags & IP6T_RT_TYP) ||
+	       ((rtinfo->rt_type == rh->type) ^
+		!!(rtinfo->invflags & IP6T_RT_INV_TYP)));
 
 	if (ret && (rtinfo->flags & IP6T_RT_RES)) {
-		u_int32_t *rp, _reserved;
+		const u_int32_t *rp;
+		u_int32_t _reserved;
 		rp = skb_header_pointer(skb,
-					ptr + offsetof(struct rt0_hdr, reserved),
-					sizeof(_reserved), &_reserved);
+					ptr + offsetof(struct rt0_hdr,
+						       reserved),
+					sizeof(_reserved),
+					&_reserved);
 
 		ret = (*rp == 0);
 	}
 
-	DEBUGP("#%d ",rtinfo->addrnr);
-       if ( !(rtinfo->flags & IP6T_RT_FST) ){
-	       return ret;
+	pr_debug("#%d ", rtinfo->addrnr);
+	if (!(rtinfo->flags & IP6T_RT_FST)) {
+		return ret;
 	} else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) {
-		DEBUGP("Not strict ");
-		if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){
-			DEBUGP("There isn't enough space\n");
-			return 0;
+		pr_debug("Not strict ");
+		if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
+			pr_debug("There isn't enough space\n");
+			return false;
 		} else {
 			unsigned int i = 0;
 
-			DEBUGP("#%d ",rtinfo->addrnr);
-			for(temp=0; temp<(unsigned int)((hdrlen-8)/16); temp++){
+			pr_debug("#%d ", rtinfo->addrnr);
+			for (temp = 0;
+			     temp < (unsigned int)((hdrlen - 8) / 16);
+			     temp++) {
 				ap = skb_header_pointer(skb,
 							ptr
 							+ sizeof(struct rt0_hdr)
@@ -141,24 +140,26 @@ match(const struct sk_buff *skb,
 				BUG_ON(ap == NULL);
 
 				if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
-					DEBUGP("i=%d temp=%d;\n",i,temp);
+					pr_debug("i=%d temp=%d;\n", i, temp);
 					i++;
 				}
-				if (i==rtinfo->addrnr) break;
+				if (i == rtinfo->addrnr)
+					break;
 			}
-			DEBUGP("i=%d #%d\n", i, rtinfo->addrnr);
+			pr_debug("i=%d #%d\n", i, rtinfo->addrnr);
 			if (i == rtinfo->addrnr)
 				return ret;
-			else return 0;
+			else
+				return false;
 		}
 	} else {
-		DEBUGP("Strict ");
-		if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){
-			DEBUGP("There isn't enough space\n");
-			return 0;
+		pr_debug("Strict ");
+		if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
+			pr_debug("There isn't enough space\n");
+			return false;
 		} else {
-			DEBUGP("#%d ",rtinfo->addrnr);
-			for(temp=0; temp<rtinfo->addrnr; temp++){
+			pr_debug("#%d ", rtinfo->addrnr);
+			for (temp = 0; temp < rtinfo->addrnr; temp++) {
 				ap = skb_header_pointer(skb,
 							ptr
 							+ sizeof(struct rt0_hdr)
@@ -170,63 +171,55 @@ match(const struct sk_buff *skb,
 				if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
 					break;
 			}
-			DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr);
-			if ((temp == rtinfo->addrnr) && (temp == (unsigned int)((hdrlen-8)/16)))
+			pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr);
+			if (temp == rtinfo->addrnr &&
+			    temp == (unsigned int)((hdrlen - 8) / 16))
 				return ret;
-			else return 0;
+			else
+				return false;
 		}
 	}
 
-	return 0;
+	return false;
 }
 
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
-          const struct ip6t_ip6 *ip,
-          void *matchinfo,
-          unsigned int matchinfosize,
-          unsigned int hook_mask)
+static int rt_mt6_check(const struct xt_mtchk_param *par)
 {
-       const struct ip6t_rt *rtinfo = matchinfo;
-
-       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_rt))) {
-              DEBUGP("ip6t_rt: matchsize %u != %u\n",
-                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_rt)));
-              return 0;
-       }
-       if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
-              DEBUGP("ip6t_rt: unknown flags %X\n",
-                      rtinfo->invflags);
-              return 0;
-       }
-       if ( (rtinfo->flags & (IP6T_RT_RES|IP6T_RT_FST_MASK)) && 
-		       (!(rtinfo->flags & IP6T_RT_TYP) || 
-		       (rtinfo->rt_type != 0) || 
-		       (rtinfo->invflags & IP6T_RT_INV_TYP)) ) {
-	      DEBUGP("`--rt-type 0' required before `--rt-0-*'");
-              return 0;
-       }
-
-       return 1;
+	const struct ip6t_rt *rtinfo = par->matchinfo;
+
+	if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
+		pr_debug("unknown flags %X\n", rtinfo->invflags);
+		return -EINVAL;
+	}
+	if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
+	    (!(rtinfo->flags & IP6T_RT_TYP) ||
+	     (rtinfo->rt_type != 0) ||
+	     (rtinfo->invflags & IP6T_RT_INV_TYP))) {
+		pr_debug("`--rt-type 0' required before `--rt-0-*'");
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
-static struct ip6t_match rt_match = {
+static struct xt_match rt_mt6_reg __read_mostly = {
 	.name		= "rt",
-	.match		= &match,
-	.checkentry	= &checkentry,
+	.family		= NFPROTO_IPV6,
+	.match		= rt_mt6,
+	.matchsize	= sizeof(struct ip6t_rt),
+	.checkentry	= rt_mt6_check,
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+static int __init rt_mt6_init(void)
 {
-       return ip6t_register_match(&rt_match);
+	return xt_register_match(&rt_mt6_reg);
 }
 
-static void __exit cleanup(void)
+static void __exit rt_mt6_exit(void)
 {
-       ip6t_unregister_match(&rt_match);
+	xt_unregister_match(&rt_mt6_reg);
 }
 
-module_init(init);
-module_exit(cleanup);
+module_init(rt_mt6_init);
+module_exit(rt_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 4c0028671c2..ca7f6c12808 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -12,203 +12,96 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("ip6tables filter table");
 
-#define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT))
+#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
+			    (1 << NF_INET_FORWARD) | \
+			    (1 << NF_INET_LOCAL_OUT))
 
-/* Standard entry. */
-struct ip6t_standard
-{
-	struct ip6t_entry entry;
-	struct ip6t_standard_target target;
-};
-
-struct ip6t_error_target
-{
-	struct ip6t_entry_target target;
-	char errorname[IP6T_FUNCTION_MAXNAMELEN];
-};
-
-struct ip6t_error
-{
-	struct ip6t_entry entry;
-	struct ip6t_error_target target;
-};
-
-static struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[3];
-	struct ip6t_error term;
-} initial_table __initdata
-= { { "filter", FILTER_VALID_HOOKS, 4,
-      sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
-      { [NF_IP6_LOCAL_IN] = 0,
-	[NF_IP6_FORWARD] = sizeof(struct ip6t_standard),
-	[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 },
-      { [NF_IP6_LOCAL_IN] = 0,
-	[NF_IP6_FORWARD] = sizeof(struct ip6t_standard),
-	[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 },
-      0, NULL, { } },
-    {
-	    /* LOCAL_IN */
-	    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ip6t_entry),
-		sizeof(struct ip6t_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* FORWARD */
-	    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ip6t_entry),
-		sizeof(struct ip6t_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* LOCAL_OUT */
-	    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ip6t_entry),
-		sizeof(struct ip6t_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } }
-    },
-    /* ERROR */
-    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-	0,
-	sizeof(struct ip6t_entry),
-	sizeof(struct ip6t_error),
-	0, { 0, 0 }, { } },
-      { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } },
-	  { } },
-	"ERROR"
-      }
-    }
-};
-
-static struct ip6t_table packet_filter = {
+static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
-	.lock		= RW_LOCK_UNLOCKED,
 	.me		= THIS_MODULE,
+	.af		= NFPROTO_IPV6,
+	.priority	= NF_IP6_PRI_FILTER,
 };
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6t_hook(unsigned int hook,
-	 struct sk_buff **pskb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
-{
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
-}
-
-static unsigned int
-ip6t_local_out_hook(unsigned int hook,
-		   struct sk_buff **pskb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
+ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+		     const struct net_device *in, const struct net_device *out,
+		     int (*okfn)(struct sk_buff *))
 {
-#if 0
-	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
-		if (net_ratelimit())
-			printk("ip6t_hook: happy cracking.\n");
-		return NF_ACCEPT;
-	}
-#endif
+	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     net->ipv6.ip6table_filter);
 }
 
-static struct nf_hook_ops ip6t_ops[] = {
-	{
-		.hook		= ip6t_hook,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_IN,
-		.priority	= NF_IP6_PRI_FILTER,
-	},
-	{
-		.hook		= ip6t_hook,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_FORWARD,
-		.priority	= NF_IP6_PRI_FILTER,
-	},
-	{
-		.hook		= ip6t_local_out_hook,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_FILTER,
-	},
-};
+static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
-static int forward = NF_ACCEPT;
+static bool forward = true;
 module_param(forward, bool, 0000);
 
-static int __init init(void)
+static int __net_init ip6table_filter_net_init(struct net *net)
 {
-	int ret;
-
-	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
-		return -EINVAL;
-	}
+	struct ip6t_replace *repl;
 
+	repl = ip6t_alloc_initial_table(&packet_filter);
+	if (repl == NULL)
+		return -ENOMEM;
 	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
+	((struct ip6t_standard *)repl->entries)[1].target.verdict =
+		forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+
+	net->ipv6.ip6table_filter =
+		ip6t_register_table(net, &packet_filter, repl);
+	kfree(repl);
+	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter);
+}
+
+static void __net_exit ip6table_filter_net_exit(struct net *net)
+{
+	ip6t_unregister_table(net, net->ipv6.ip6table_filter);
+}
+
+static struct pernet_operations ip6table_filter_net_ops = {
+	.init = ip6table_filter_net_init,
+	.exit = ip6table_filter_net_exit,
+};
+
+static int __init ip6table_filter_init(void)
+{
+	int ret;
 
-	/* Register table */
-	ret = ip6t_register_table(&packet_filter, &initial_table.repl);
+	ret = register_pernet_subsys(&ip6table_filter_net_ops);
 	if (ret < 0)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hook(&ip6t_ops[0]);
-	if (ret < 0)
+	filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook);
+	if (IS_ERR(filter_ops)) {
+		ret = PTR_ERR(filter_ops);
 		goto cleanup_table;
-
-	ret = nf_register_hook(&ip6t_ops[1]);
-	if (ret < 0)
-		goto cleanup_hook0;
-
-	ret = nf_register_hook(&ip6t_ops[2]);
-	if (ret < 0)
-		goto cleanup_hook1;
+	}
 
 	return ret;
 
- cleanup_hook1:
-	nf_unregister_hook(&ip6t_ops[1]);
- cleanup_hook0:
-	nf_unregister_hook(&ip6t_ops[0]);
  cleanup_table:
-	ip6t_unregister_table(&packet_filter);
-
+	unregister_pernet_subsys(&ip6table_filter_net_ops);
 	return ret;
 }
 
-static void __exit fini(void)
+static void __exit ip6table_filter_fini(void)
 {
-	unsigned int i;
-
-	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
-		nf_unregister_hook(&ip6t_ops[i]);
-
-	ip6t_unregister_table(&packet_filter);
+	xt_hook_unlink(&packet_filter, filter_ops);
+	unregister_pernet_subsys(&ip6table_filter_net_ops);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(ip6table_filter_init);
+module_exit(ip6table_filter_fini);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 85c1e6eada1..307bbb782d1 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -7,281 +7,140 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
+#include <net/ipv6.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("ip6tables mangle table");
 
-#define MANGLE_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | \
-			    (1 << NF_IP6_LOCAL_IN) | \
-			    (1 << NF_IP6_FORWARD) | \
-			    (1 << NF_IP6_LOCAL_OUT) | \
-			    (1 << NF_IP6_POST_ROUTING))
-
-#if 0
-#define DEBUGP(x, args...)	printk(KERN_DEBUG x, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
-/* Standard entry. */
-struct ip6t_standard
-{
-	struct ip6t_entry entry;
-	struct ip6t_standard_target target;
-};
-
-struct ip6t_error_target
-{
-	struct ip6t_entry_target target;
-	char errorname[IP6T_FUNCTION_MAXNAMELEN];
-};
-
-struct ip6t_error
-{
-	struct ip6t_entry entry;
-	struct ip6t_error_target target;
-};
+#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+			    (1 << NF_INET_LOCAL_IN) | \
+			    (1 << NF_INET_FORWARD) | \
+			    (1 << NF_INET_LOCAL_OUT) | \
+			    (1 << NF_INET_POST_ROUTING))
 
-static struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[5];
-	struct ip6t_error term;
-} initial_table __initdata
-= { { "mangle", MANGLE_VALID_HOOKS, 6,
-      sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
-      { [NF_IP6_PRE_ROUTING] 	= 0,
-	[NF_IP6_LOCAL_IN]	= sizeof(struct ip6t_standard),
-	[NF_IP6_FORWARD]	= sizeof(struct ip6t_standard) * 2,
-	[NF_IP6_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
-	[NF_IP6_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4},
-      { [NF_IP6_PRE_ROUTING] 	= 0,
-	[NF_IP6_LOCAL_IN]	= sizeof(struct ip6t_standard),
-	[NF_IP6_FORWARD]	= sizeof(struct ip6t_standard) * 2,
-	[NF_IP6_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
-	[NF_IP6_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4},
-      0, NULL, { } },
-    {
-	    /* PRE_ROUTING */
-            { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ip6t_entry),
-		sizeof(struct ip6t_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* LOCAL_IN */
-            { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ip6t_entry),
-		sizeof(struct ip6t_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* FORWARD */
-            { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ip6t_entry),
-		sizeof(struct ip6t_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* LOCAL_OUT */
-            { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ip6t_entry),
-		sizeof(struct ip6t_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* POST_ROUTING */
-	    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ip6t_entry),
-		sizeof(struct ip6t_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } }
-    },
-    /* ERROR */
-    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-	0,
-	sizeof(struct ip6t_entry),
-	sizeof(struct ip6t_error),
-	0, { 0, 0 }, { } },
-      { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } },
-	  { } },
-	"ERROR"
-      }
-    }
-};
-
-static struct ip6t_table packet_mangler = {
+static const struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
-	.lock		= RW_LOCK_UNLOCKED,
 	.me		= THIS_MODULE,
+	.af		= NFPROTO_IPV6,
+	.priority	= NF_IP6_PRI_MANGLE,
 };
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6t_route_hook(unsigned int hook,
-	 struct sk_buff **pskb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
-{
-	return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
-}
-
 static unsigned int
-ip6t_local_hook(unsigned int hook,
-		   struct sk_buff **pskb,
-		   const struct net_device *in,
-		   const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
+ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 {
-
-	unsigned long nfmark;
 	unsigned int ret;
 	struct in6_addr saddr, daddr;
 	u_int8_t hop_limit;
-	u_int32_t flowlabel;
-
+	u_int32_t flowlabel, mark;
+	int err;
 #if 0
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
-		if (net_ratelimit())
-			printk("ip6t_hook: happy cracking.\n");
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
+		net_warn_ratelimited("ip6t_hook: happy cracking\n");
 		return NF_ACCEPT;
 	}
 #endif
 
-	/* save source/dest address, nfmark, hoplimit, flowlabel, priority,  */
-	memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr));
-	memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr));
-	nfmark = (*pskb)->nfmark;
-	hop_limit = (*pskb)->nh.ipv6h->hop_limit;
+	/* save source/dest address, mark, hoplimit, flowlabel, priority,  */
+	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+	mark = skb->mark;
+	hop_limit = ipv6_hdr(skb)->hop_limit;
 
 	/* flowlabel and prio (includes version, which shouldn't change either */
-	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
-
-	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
+
+	ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
+			    dev_net(out)->ipv6.ip6table_mangle);
+
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
+	     !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
+	     skb->mark != mark ||
+	     ipv6_hdr(skb)->hop_limit != hop_limit ||
+	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
+		err = ip6_route_me_harder(skb);
+		if (err < 0)
+			ret = NF_DROP_ERR(err);
+	}
 
-	if (ret != NF_DROP && ret != NF_STOLEN 
-		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
-		    || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
-		    || (*pskb)->nfmark != nfmark
-		    || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) {
+	return ret;
+}
 
-		/* something which could affect routing has changed */
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+		     const struct net_device *in, const struct net_device *out,
+		     int (*okfn)(struct sk_buff *))
+{
+	if (ops->hooknum == NF_INET_LOCAL_OUT)
+		return ip6t_mangle_out(skb, out);
+	if (ops->hooknum == NF_INET_POST_ROUTING)
+		return ip6t_do_table(skb, ops->hooknum, in, out,
+				     dev_net(out)->ipv6.ip6table_mangle);
+	/* INPUT/FORWARD */
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     dev_net(in)->ipv6.ip6table_mangle);
+}
 
-		DEBUGP("ip6table_mangle: we'd need to re-route a packet\n");
-	}
+static struct nf_hook_ops *mangle_ops __read_mostly;
+static int __net_init ip6table_mangle_net_init(struct net *net)
+{
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&packet_mangler);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv6.ip6table_mangle =
+		ip6t_register_table(net, &packet_mangler, repl);
+	kfree(repl);
+	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle);
+}
 
-	return ret;
+static void __net_exit ip6table_mangle_net_exit(struct net *net)
+{
+	ip6t_unregister_table(net, net->ipv6.ip6table_mangle);
 }
 
-static struct nf_hook_ops ip6t_ops[] = {
-	{
-		.hook		= ip6t_route_hook,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_PRE_ROUTING,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6t_local_hook,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_IN,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6t_route_hook,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_FORWARD,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6t_local_hook,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6t_route_hook,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum	= NF_IP6_POST_ROUTING,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
+static struct pernet_operations ip6table_mangle_net_ops = {
+	.init = ip6table_mangle_net_init,
+	.exit = ip6table_mangle_net_exit,
 };
 
-static int __init init(void)
+static int __init ip6table_mangle_init(void)
 {
 	int ret;
 
-	/* Register table */
-	ret = ip6t_register_table(&packet_mangler, &initial_table.repl);
+	ret = register_pernet_subsys(&ip6table_mangle_net_ops);
 	if (ret < 0)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hook(&ip6t_ops[0]);
-	if (ret < 0)
+	mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook);
+	if (IS_ERR(mangle_ops)) {
+		ret = PTR_ERR(mangle_ops);
 		goto cleanup_table;
-
-	ret = nf_register_hook(&ip6t_ops[1]);
-	if (ret < 0)
-		goto cleanup_hook0;
-
-	ret = nf_register_hook(&ip6t_ops[2]);
-	if (ret < 0)
-		goto cleanup_hook1;
-
-	ret = nf_register_hook(&ip6t_ops[3]);
-	if (ret < 0)
-		goto cleanup_hook2;
-
-	ret = nf_register_hook(&ip6t_ops[4]);
-	if (ret < 0)
-		goto cleanup_hook3;
+	}
 
 	return ret;
 
- cleanup_hook3:
-        nf_unregister_hook(&ip6t_ops[3]);
- cleanup_hook2:
-	nf_unregister_hook(&ip6t_ops[2]);
- cleanup_hook1:
-	nf_unregister_hook(&ip6t_ops[1]);
- cleanup_hook0:
-	nf_unregister_hook(&ip6t_ops[0]);
  cleanup_table:
-	ip6t_unregister_table(&packet_mangler);
-
+	unregister_pernet_subsys(&ip6table_mangle_net_ops);
 	return ret;
 }
 
-static void __exit fini(void)
+static void __exit ip6table_mangle_fini(void)
 {
-	unsigned int i;
-
-	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
-		nf_unregister_hook(&ip6t_ops[i]);
-
-	ip6t_unregister_table(&packet_mangler);
+	xt_hook_unlink(&packet_mangler, mangle_ops);
+	unregister_pernet_subsys(&ip6table_mangle_net_ops);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(ip6table_mangle_init);
+module_exit(ip6table_mangle_fini);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
new file mode 100644
index 00000000000..387d8b8fc18
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
+ * funded by Astaro.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv6_table = {
+	.name		= "nat",
+	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
+	.me		= THIS_MODULE,
+	.af		= NFPROTO_IPV6,
+};
+
+static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+	/* Force range to this IP; let proto decide mapping for
+	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+	 */
+	struct nf_nat_range range;
+
+	range.flags = 0;
+	pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
+		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
+		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
+
+	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+
+static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
+				     const struct net_device *in,
+				     const struct net_device *out,
+				     struct nf_conn *ct)
+{
+	struct net *net = nf_ct_net(ct);
+	unsigned int ret;
+
+	ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
+	if (ret == NF_ACCEPT) {
+		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+			ret = alloc_null_binding(ct, hooknum);
+	}
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
+	       struct sk_buff *skb,
+	       const struct net_device *in,
+	       const struct net_device *out,
+	       int (*okfn)(struct sk_buff *))
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_nat *nat;
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+	__be16 frag_off;
+	int hdrlen;
+	u8 nexthdr;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	/* Can't track?  It's not due to stress, or conntrack would
+	 * have dropped it.  Hence it's the user's responsibilty to
+	 * packet filter it out, or implement conntrack/NAT for that
+	 * protocol. 8) --RR
+	 */
+	if (!ct)
+		return NF_ACCEPT;
+
+	/* Don't try to NAT if this packet is not conntracked */
+	if (nf_ct_is_untracked(ct))
+		return NF_ACCEPT;
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED_REPLY:
+		nexthdr = ipv6_hdr(skb)->nexthdr;
+		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+					  &nexthdr, &frag_off);
+
+		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+							     ops->hooknum,
+							     hdrlen))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+	case IP_CT_NEW:
+		/* Seen it before?  This can happen for loopback, retrans,
+		 * or local packets.
+		 */
+		if (!nf_nat_initialized(ct, maniptype)) {
+			unsigned int ret;
+
+			ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
+			if (ret != NF_ACCEPT)
+				return ret;
+		} else {
+			pr_debug("Already setup manip %s for ct %p\n",
+				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+				 ct);
+			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+				goto oif_changed;
+		}
+		break;
+
+	default:
+		/* ESTABLISHED */
+		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+			goto oif_changed;
+	}
+
+	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+
+oif_changed:
+	nf_ct_kill_acct(ct, ctinfo, skb);
+	return NF_DROP;
+}
+
+static unsigned int
+nf_nat_ipv6_in(const struct nf_hook_ops *ops,
+	       struct sk_buff *skb,
+	       const struct net_device *in,
+	       const struct net_device *out,
+	       int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+		skb_dst_drop(skb);
+
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_out(const struct nf_hook_ops *ops,
+		struct sk_buff *skb,
+		const struct net_device *in,
+		const struct net_device *out,
+		int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	int err;
+#endif
+	unsigned int ret;
+
+	/* root is playing with raw sockets. */
+	if (skb->len < sizeof(struct ipv6hdr))
+		return NF_ACCEPT;
+
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+				      &ct->tuplehash[!dir].tuple.dst.u3) ||
+		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+		     ct->tuplehash[dir].tuple.src.u.all !=
+		     ct->tuplehash[!dir].tuple.dst.u.all)) {
+			err = nf_xfrm_me_harder(skb, AF_INET6);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+	}
+#endif
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+		     struct sk_buff *skb,
+		     const struct net_device *in,
+		     const struct net_device *out,
+		     int (*okfn)(struct sk_buff *))
+{
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int ret;
+	int err;
+
+	/* root is playing with raw sockets. */
+	if (skb->len < sizeof(struct ipv6hdr))
+		return NF_ACCEPT;
+
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+				      &ct->tuplehash[!dir].tuple.src.u3)) {
+			err = ip6_route_me_harder(skb);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#ifdef CONFIG_XFRM
+		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+			 ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all) {
+			err = nf_xfrm_me_harder(skb, AF_INET6);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
+#endif
+	}
+	return ret;
+}
+
+static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv6_in,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv6_out,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_NAT_SRC,
+	},
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv6_local_fn,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv6_fn,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_NAT_SRC,
+	},
+};
+
+static int __net_init ip6table_nat_net_init(struct net *net)
+{
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
+	kfree(repl);
+	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat);
+}
+
+static void __net_exit ip6table_nat_net_exit(struct net *net)
+{
+	ip6t_unregister_table(net, net->ipv6.ip6table_nat);
+}
+
+static struct pernet_operations ip6table_nat_net_ops = {
+	.init	= ip6table_nat_net_init,
+	.exit	= ip6table_nat_net_exit,
+};
+
+static int __init ip6table_nat_init(void)
+{
+	int err;
+
+	err = register_pernet_subsys(&ip6table_nat_net_ops);
+	if (err < 0)
+		goto err1;
+
+	err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+	if (err < 0)
+		goto err2;
+	return 0;
+
+err2:
+	unregister_pernet_subsys(&ip6table_nat_net_ops);
+err1:
+	return err;
+}
+
+static void __exit ip6table_nat_exit(void)
+{
+	nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+	unregister_pernet_subsys(&ip6table_nat_net_ops);
+}
+
+module_init(ip6table_nat_init);
+module_exit(ip6table_nat_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index c2982efd14a..5274740acec 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -5,180 +5,83 @@
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
 
-#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT))
+#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
-#if 0
-#define DEBUGP(x, args...)	printk(KERN_DEBUG x, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
-/* Standard entry. */
-struct ip6t_standard
-{
-	struct ip6t_entry entry;
-	struct ip6t_standard_target target;
+static const struct xt_table packet_raw = {
+	.name = "raw",
+	.valid_hooks = RAW_VALID_HOOKS,
+	.me = THIS_MODULE,
+	.af = NFPROTO_IPV6,
+	.priority = NF_IP6_PRI_RAW,
 };
 
-struct ip6t_error_target
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+		  const struct net_device *in, const struct net_device *out,
+		  int (*okfn)(struct sk_buff *))
 {
-	struct ip6t_entry_target target;
-	char errorname[IP6T_FUNCTION_MAXNAMELEN];
-};
+	const struct net *net = dev_net((in != NULL) ? in : out);
 
-struct ip6t_error
-{
-	struct ip6t_entry entry;
-	struct ip6t_error_target target;
-};
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     net->ipv6.ip6table_raw);
+}
 
-static struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[2];
-	struct ip6t_error term;
-} initial_table __initdata = {
-	.repl = {
-		.name = "raw",
-		.valid_hooks = RAW_VALID_HOOKS,
-		.num_entries = 3,
-		.size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error),
-		.hook_entry = {
-			[NF_IP6_PRE_ROUTING] = 0,
-			[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard)
-		},
-		.underflow = {
-			[NF_IP6_PRE_ROUTING] = 0,
-			[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard)
-		},
-	},
-	.entries = {
-		/* PRE_ROUTING */
-		{
-			.entry = {
-				.target_offset = sizeof(struct ip6t_entry),
-				.next_offset = sizeof(struct ip6t_standard),
-			},
-			.target = {
-				.target = {
-					.u = {
-						.target_size = IP6T_ALIGN(sizeof(struct ip6t_standard_target)),
-					},
-				},
-				.verdict = -NF_ACCEPT - 1,
-			},
-		},
-
-		/* LOCAL_OUT */
-		{
-			.entry = {
-				.target_offset = sizeof(struct ip6t_entry),
-				.next_offset = sizeof(struct ip6t_standard),
-			},
-			.target = {
-				.target = {
-					.u = {
-						.target_size = IP6T_ALIGN(sizeof(struct ip6t_standard_target)),
-					},
-				},
-				.verdict = -NF_ACCEPT - 1,
-			},
-		},
-	},
-	/* ERROR */
-	.term = {
-		.entry = {
-			.target_offset = sizeof(struct ip6t_entry),
-			.next_offset = sizeof(struct ip6t_error),
-		},
-		.target = {
-			.target = {
-				.u = {
-					.user = {
-						.target_size = IP6T_ALIGN(sizeof(struct ip6t_error_target)),
-						.name = IP6T_ERROR_TARGET,
-					},
-				},
-			},
-			.errorname = "ERROR",
-		},
-	}
-};
+static struct nf_hook_ops *rawtable_ops __read_mostly;
 
-static struct ip6t_table packet_raw = { 
-	.name = "raw", 
-	.valid_hooks = RAW_VALID_HOOKS, 
-	.lock = RW_LOCK_UNLOCKED, 
-	.me = THIS_MODULE
-};
+static int __net_init ip6table_raw_net_init(struct net *net)
+{
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&packet_raw);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv6.ip6table_raw =
+		ip6t_register_table(net, &packet_raw, repl);
+	kfree(repl);
+	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw);
+}
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6t_hook(unsigned int hook,
-	 struct sk_buff **pskb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
+static void __net_exit ip6table_raw_net_exit(struct net *net)
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL);
+	ip6t_unregister_table(net, net->ipv6.ip6table_raw);
 }
 
-static struct nf_hook_ops ip6t_ops[] = { 
-	{
-	  .hook = ip6t_hook, 
-	  .pf = PF_INET6,
-	  .hooknum = NF_IP6_PRE_ROUTING,
-	  .priority = NF_IP6_PRI_FIRST,
-	  .owner = THIS_MODULE,
-	},
-	{
-	  .hook = ip6t_hook, 
-	  .pf = PF_INET6, 
-	  .hooknum = NF_IP6_LOCAL_OUT,
-	  .priority = NF_IP6_PRI_FIRST,
-	  .owner = THIS_MODULE,
-	},
+static struct pernet_operations ip6table_raw_net_ops = {
+	.init = ip6table_raw_net_init,
+	.exit = ip6table_raw_net_exit,
 };
 
-static int __init init(void)
+static int __init ip6table_raw_init(void)
 {
 	int ret;
 
-	/* Register table */
-	ret = ip6t_register_table(&packet_raw, &initial_table.repl);
+	ret = register_pernet_subsys(&ip6table_raw_net_ops);
 	if (ret < 0)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hook(&ip6t_ops[0]);
-	if (ret < 0)
+	rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook);
+	if (IS_ERR(rawtable_ops)) {
+		ret = PTR_ERR(rawtable_ops);
 		goto cleanup_table;
-
-	ret = nf_register_hook(&ip6t_ops[1]);
-	if (ret < 0)
-		goto cleanup_hook0;
+	}
 
 	return ret;
 
- cleanup_hook0:
-	nf_unregister_hook(&ip6t_ops[0]);
  cleanup_table:
-	ip6t_unregister_table(&packet_raw);
-
+	unregister_pernet_subsys(&ip6table_raw_net_ops);
 	return ret;
 }
 
-static void __exit fini(void)
+static void __exit ip6table_raw_fini(void)
 {
-	unsigned int i;
-
-	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
-		nf_unregister_hook(&ip6t_ops[i]);
-
-	ip6t_unregister_table(&packet_raw);
+	xt_hook_unlink(&packet_raw, rawtable_ops);
+	unregister_pernet_subsys(&ip6table_raw_net_ops);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(ip6table_raw_init);
+module_exit(ip6table_raw_fini);
 MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
new file mode 100644
index 00000000000..ab3b0219ecf
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -0,0 +1,103 @@
+/*
+ * "security" table for IPv6
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS	(1 << NF_INET_LOCAL_IN) | \
+				(1 << NF_INET_FORWARD) | \
+				(1 << NF_INET_LOCAL_OUT)
+
+static const struct xt_table security_table = {
+	.name		= "security",
+	.valid_hooks	= SECURITY_VALID_HOOKS,
+	.me		= THIS_MODULE,
+	.af		= NFPROTO_IPV6,
+	.priority	= NF_IP6_PRI_SECURITY,
+};
+
+static unsigned int
+ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+		       const struct net_device *in,
+		       const struct net_device *out,
+		       int (*okfn)(struct sk_buff *))
+{
+	const struct net *net = dev_net((in != NULL) ? in : out);
+
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     net->ipv6.ip6table_security);
+}
+
+static struct nf_hook_ops *sectbl_ops __read_mostly;
+
+static int __net_init ip6table_security_net_init(struct net *net)
+{
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&security_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv6.ip6table_security =
+		ip6t_register_table(net, &security_table, repl);
+	kfree(repl);
+	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security);
+}
+
+static void __net_exit ip6table_security_net_exit(struct net *net)
+{
+	ip6t_unregister_table(net, net->ipv6.ip6table_security);
+}
+
+static struct pernet_operations ip6table_security_net_ops = {
+	.init = ip6table_security_net_init,
+	.exit = ip6table_security_net_exit,
+};
+
+static int __init ip6table_security_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&ip6table_security_net_ops);
+	if (ret < 0)
+		return ret;
+
+	sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook);
+	if (IS_ERR(sectbl_ops)) {
+		ret = PTR_ERR(sectbl_ops);
+		goto cleanup_table;
+	}
+
+	return ret;
+
+cleanup_table:
+	unregister_pernet_subsys(&ip6table_security_net_ops);
+	return ret;
+}
+
+static void __exit ip6table_security_fini(void)
+{
+	xt_hook_unlink(&security_table, sectbl_ops);
+	unregister_pernet_subsys(&ip6table_security_net_ops);
+}
+
+module_init(ip6table_security_init);
+module_exit(ip6table_security_fini);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
new file mode 100644
index 00000000000..4cbc6b290dd
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright (C)2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netfilter/nf_log.h>
+
+static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+			      struct nf_conntrack_tuple *tuple)
+{
+	const u_int32_t *ap;
+	u_int32_t _addrs[8];
+
+	ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
+				sizeof(_addrs), _addrs);
+	if (ap == NULL)
+		return false;
+
+	memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+	memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+
+	return true;
+}
+
+static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+			      const struct nf_conntrack_tuple *orig)
+{
+	memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
+	memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
+
+	return true;
+}
+
+static int ipv6_print_tuple(struct seq_file *s,
+			    const struct nf_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "src=%pI6 dst=%pI6 ",
+			  tuple->src.u3.ip6, tuple->dst.u3.ip6);
+}
+
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    unsigned int *dataoff, u_int8_t *protonum)
+{
+	unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+	__be16 frag_off;
+	int protoff;
+	u8 nexthdr;
+
+	if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+			  &nexthdr, sizeof(nexthdr)) != 0) {
+		pr_debug("ip6_conntrack_core: can't get nexthdr\n");
+		return -NF_ACCEPT;
+	}
+	protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+	/*
+	 * (protoff == skb->len) means the packet has not data, just
+	 * IPv6 and possibly extensions headers, but it is tracked anyway
+	 */
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
+		return -NF_ACCEPT;
+	}
+
+	*dataoff = protoff;
+	*protonum = nexthdr;
+	return NF_ACCEPT;
+}
+
+static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
+				struct sk_buff *skb,
+				const struct net_device *in,
+				const struct net_device *out,
+				int (*okfn)(struct sk_buff *))
+{
+	struct nf_conn *ct;
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_helper *helper;
+	enum ip_conntrack_info ctinfo;
+	__be16 frag_off;
+	int protoff;
+	u8 nexthdr;
+
+	/* This is where we call the helper: as the packet goes out. */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		return NF_ACCEPT;
+
+	help = nfct_help(ct);
+	if (!help)
+		return NF_ACCEPT;
+	/* rcu_read_lock()ed by nf_hook_slow */
+	helper = rcu_dereference(help->helper);
+	if (!helper)
+		return NF_ACCEPT;
+
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("proto header not found\n");
+		return NF_ACCEPT;
+	}
+
+	return helper->help(skb, protoff, ct, ctinfo);
+}
+
+static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
+				 struct sk_buff *skb,
+				 const struct net_device *in,
+				 const struct net_device *out,
+				 int (*okfn)(struct sk_buff *))
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+	int protoff;
+	__be16 frag_off;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		goto out;
+
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("proto header not found\n");
+		goto out;
+	}
+
+	/* adjust seqs for loopback traffic only in outgoing direction */
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
+		if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+			return NF_DROP;
+		}
+	}
+out:
+	/* We've seen it coming out the other side: confirm it */
+	return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
+				      struct sk_buff *skb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);
+}
+
+static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
+					 struct sk_buff *skb,
+					 const struct net_device *in,
+					 const struct net_device *out,
+					 int (*okfn)(struct sk_buff *))
+{
+	/* root is playing with raw sockets. */
+	if (skb->len < sizeof(struct ipv6hdr)) {
+		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
+		return NF_ACCEPT;
+	}
+	return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);
+}
+
+static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
+	{
+		.hook		= ipv6_conntrack_in,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv6_conntrack_local,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv6_helper,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv6_confirm,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_LAST,
+	},
+	{
+		.hook		= ipv6_helper,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv6_confirm,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_LAST-1,
+	},
+};
+
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+	const struct nf_conntrack_tuple_hash *h;
+	struct sockaddr_in6 sin6;
+	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+	struct nf_conn *ct;
+
+	tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
+	tuple.src.u.tcp.port = inet->inet_sport;
+	tuple.dst.u3.in6 = sk->sk_v6_daddr;
+	tuple.dst.u.tcp.port = inet->inet_dport;
+	tuple.dst.protonum = sk->sk_protocol;
+
+	if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP)
+		return -ENOPROTOOPT;
+
+	if (*len < 0 || (unsigned int) *len < sizeof(sin6))
+		return -EINVAL;
+
+	h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+	if (!h) {
+		pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+			 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+			 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+		return -ENOENT;
+	}
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+	sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK;
+	memcpy(&sin6.sin6_addr,
+		&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+					sizeof(sin6.sin6_addr));
+
+	nf_ct_put(ct);
+	sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
+						 sk->sk_bound_dev_if);
+	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
+		    &tuple->src.u3.ip6) ||
+	    nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
+		    &tuple->dst.u3.ip6))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = {
+	[CTA_IP_V6_SRC]	= { .len = sizeof(u_int32_t)*4 },
+	[CTA_IP_V6_DST]	= { .len = sizeof(u_int32_t)*4 },
+};
+
+static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
+		return -EINVAL;
+
+	memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]),
+	       sizeof(u_int32_t) * 4);
+	memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]),
+	       sizeof(u_int32_t) * 4);
+
+	return 0;
+}
+
+static int ipv6_nlattr_tuple_size(void)
+{
+	return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
+}
+#endif
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
+	.l3proto		= PF_INET6,
+	.name			= "ipv6",
+	.pkt_to_tuple		= ipv6_pkt_to_tuple,
+	.invert_tuple		= ipv6_invert_tuple,
+	.print_tuple		= ipv6_print_tuple,
+	.get_l4proto		= ipv6_get_l4proto,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= ipv6_tuple_to_nlattr,
+	.nlattr_tuple_size	= ipv6_nlattr_tuple_size,
+	.nlattr_to_tuple	= ipv6_nlattr_to_tuple,
+	.nla_policy		= ipv6_nla_policy,
+#endif
+	.me			= THIS_MODULE,
+};
+
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
+
+static struct nf_sockopt_ops so_getorigdst6 = {
+	.pf		= NFPROTO_IPV6,
+	.get_optmin	= IP6T_SO_ORIGINAL_DST,
+	.get_optmax	= IP6T_SO_ORIGINAL_DST + 1,
+	.get		= ipv6_getorigdst,
+	.owner		= THIS_MODULE,
+};
+
+static int ipv6_net_init(struct net *net)
+{
+	int ret = 0;
+
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_tcp6: pernet registration failed\n");
+		goto out;
+	}
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_udp6: pernet registration failed\n");
+		goto cleanup_tcp6;
+	}
+	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_icmp6: pernet registration failed\n");
+		goto cleanup_udp6;
+	}
+	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
+		goto cleanup_icmpv6;
+	}
+	return 0;
+ cleanup_icmpv6:
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
+ cleanup_udp6:
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
+ cleanup_tcp6:
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
+ out:
+	return ret;
+}
+
+static void ipv6_net_exit(struct net *net)
+{
+	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
+	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
+}
+
+static struct pernet_operations ipv6_net_ops = {
+	.init = ipv6_net_init,
+	.exit = ipv6_net_exit,
+};
+
+static int __init nf_conntrack_l3proto_ipv6_init(void)
+{
+	int ret = 0;
+
+	need_conntrack();
+	nf_defrag_ipv6_enable();
+
+	ret = nf_register_sockopt(&so_getorigdst6);
+	if (ret < 0) {
+		pr_err("Unable to register netfilter socket option\n");
+		return ret;
+	}
+
+	ret = register_pernet_subsys(&ipv6_net_ops);
+	if (ret < 0)
+		goto cleanup_sockopt;
+
+	ret = nf_register_hooks(ipv6_conntrack_ops,
+				ARRAY_SIZE(ipv6_conntrack_ops));
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
+		       "hook.\n");
+		goto cleanup_pernet;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n");
+		goto cleanup_hooks;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n");
+		goto cleanup_tcp6;
+	}
+
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n");
+		goto cleanup_udp6;
+	}
+
+	ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
+	if (ret < 0) {
+		pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
+		goto cleanup_icmpv6;
+	}
+	return ret;
+
+ cleanup_icmpv6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+ cleanup_udp6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ cleanup_tcp6:
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ cleanup_hooks:
+	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
+ cleanup_pernet:
+	unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_sockopt:
+	nf_unregister_sockopt(&so_getorigdst6);
+	return ret;
+}
+
+static void __exit nf_conntrack_l3proto_ipv6_fini(void)
+{
+	synchronize_net();
+	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
+	unregister_pernet_subsys(&ipv6_net_ops);
+	nf_unregister_sockopt(&so_getorigdst6);
+}
+
+module_init(nf_conntrack_l3proto_ipv6_init);
+module_exit(nf_conntrack_l3proto_ipv6_fini);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644
index 00000000000..b3807c5cb88
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+#include <net/netfilter/nf_log.h>
+
+static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+
+static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmpv6;
+}
+
+static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+				unsigned int dataoff,
+				struct nf_conntrack_tuple *tuple)
+{
+	const struct icmp6hdr *hp;
+	struct icmp6hdr _hdr;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return false;
+	tuple->dst.u.icmp.type = hp->icmp6_type;
+	tuple->src.u.icmp.id = hp->icmp6_identifier;
+	tuple->dst.u.icmp.code = hp->icmp6_code;
+
+	return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
+	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
+	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_REPLY + 1,
+	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_QUERY +1
+};
+
+static const u_int8_t noct_valid_new[] = {
+	[ICMPV6_MGM_QUERY - 130] = 1,
+	[ICMPV6_MGM_REPORT -130] = 1,
+	[ICMPV6_MGM_REDUCTION - 130] = 1,
+	[NDISC_ROUTER_SOLICITATION - 130] = 1,
+	[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
+	[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
+	[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
+	[ICMPV6_MLD2_REPORT - 130] = 1
+};
+
+static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+				const struct nf_conntrack_tuple *orig)
+{
+	int type = orig->dst.u.icmp.type - 128;
+	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
+		return false;
+
+	tuple->src.u.icmp.id   = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return true;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmpv6_print_tuple(struct seq_file *s,
+			      const struct nf_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "type=%u code=%u id=%u ",
+			  tuple->dst.u.icmp.type,
+			  tuple->dst.u.icmp.code,
+			  ntohs(tuple->src.u.icmp.id));
+}
+
+static unsigned int *icmpv6_get_timeouts(struct net *net)
+{
+	return &icmpv6_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmpv6_packet(struct nf_conn *ct,
+		       const struct sk_buff *skb,
+		       unsigned int dataoff,
+		       enum ip_conntrack_info ctinfo,
+		       u_int8_t pf,
+		       unsigned int hooknum,
+		       unsigned int *timeout)
+{
+	/* Do not immediately delete the connection after the first
+	   successful reply to avoid excessive conntrackd traffic
+	   and also to handle correctly ICMP echo reply duplicates. */
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
+		       unsigned int dataoff, unsigned int *timeouts)
+{
+	static const u_int8_t valid_new[] = {
+		[ICMPV6_ECHO_REQUEST - 128] = 1,
+		[ICMPV6_NI_QUERY - 128] = 1
+	};
+	int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+	if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+		/* Can't create a new ICMPv6 `conn' with this. */
+		pr_debug("icmpv6: can't create new conn with type %u\n",
+			 type + 128);
+		nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+		if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
+			nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
+				      NULL, NULL,
+				      "nf_ct_icmpv6: invalid new with type %d ",
+				      type + 128);
+		return false;
+	}
+	return true;
+}
+
+static int
+icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
+		     struct sk_buff *skb,
+		     unsigned int icmp6off,
+		     enum ip_conntrack_info *ctinfo,
+		     unsigned int hooknum)
+{
+	struct nf_conntrack_tuple intuple, origtuple;
+	const struct nf_conntrack_tuple_hash *h;
+	const struct nf_conntrack_l4proto *inproto;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+
+	NF_CT_ASSERT(skb->nfct == NULL);
+
+	/* Are they talking about one of our connections? */
+	if (!nf_ct_get_tuplepr(skb,
+			       skb_network_offset(skb)
+				+ sizeof(struct ipv6hdr)
+				+ sizeof(struct icmp6hdr),
+			       PF_INET6, &origtuple)) {
+		pr_debug("icmpv6_error: Can't get tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	/* rcu_read_lock()ed by nf_hook_slow */
+	inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
+
+	/* Ordinarily, we'd expect the inverted tupleproto, but it's
+	   been preserved inside the ICMP. */
+	if (!nf_ct_invert_tuple(&intuple, &origtuple,
+				&nf_conntrack_l3proto_ipv6, inproto)) {
+		pr_debug("icmpv6_error: Can't invert tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	*ctinfo = IP_CT_RELATED;
+
+	h = nf_conntrack_find_get(net, zone, &intuple);
+	if (!h) {
+		pr_debug("icmpv6_error: no match\n");
+		return -NF_ACCEPT;
+	} else {
+		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+			*ctinfo += IP_CT_IS_REPLY;
+	}
+
+	/* Update skb to refer to this connection */
+	skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+	skb->nfctinfo = *ctinfo;
+	return NF_ACCEPT;
+}
+
+static int
+icmpv6_error(struct net *net, struct nf_conn *tmpl,
+	     struct sk_buff *skb, unsigned int dataoff,
+	     enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
+{
+	const struct icmp6hdr *icmp6h;
+	struct icmp6hdr _ih;
+	int type;
+
+	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+	if (icmp6h == NULL) {
+		if (LOG_INVALID(net, IPPROTO_ICMPV6))
+			nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
+			      "nf_ct_icmpv6: short packet ");
+		return -NF_ACCEPT;
+	}
+
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
+		if (LOG_INVALID(net, IPPROTO_ICMPV6))
+			nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
+				      "nf_ct_icmpv6: ICMPv6 checksum failed ");
+		return -NF_ACCEPT;
+	}
+
+	type = icmp6h->icmp6_type - 130;
+	if (type >= 0 && type < sizeof(noct_valid_new) &&
+	    noct_valid_new[type]) {
+		skb->nfct = &nf_ct_untracked_get()->ct_general;
+		skb->nfctinfo = IP_CT_NEW;
+		nf_conntrack_get(skb->nfct);
+		return NF_ACCEPT;
+	}
+
+	/* is not error message ? */
+	if (icmp6h->icmp6_type >= 128)
+		return NF_ACCEPT;
+
+	return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
+				  const struct nf_conntrack_tuple *t)
+{
+	if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
+	[CTA_PROTO_ICMPV6_TYPE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMPV6_CODE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMPV6_ID]	= { .type = NLA_U16 },
+};
+
+static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
+	    !tb[CTA_PROTO_ICMPV6_CODE] ||
+	    !tb[CTA_PROTO_ICMPV6_ID])
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
+
+	if (tuple->dst.u.icmp.type < 128 ||
+	    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+	    !invmap[tuple->dst.u.icmp.type - 128])
+		return -EINVAL;
+
+	return 0;
+}
+
+static int icmpv6_nlattr_tuple_size(void)
+{
+	return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
+					struct net *net, void *data)
+{
+	unsigned int *timeout = data;
+	struct nf_icmp_net *in = icmpv6_pernet(net);
+
+	if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
+		*timeout =
+		    ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
+	} else {
+		/* Set default ICMPv6 timeout. */
+		*timeout = in->timeout;
+	}
+	return 0;
+}
+
+static int
+icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeout = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
+	[CTA_TIMEOUT_ICMPV6_TIMEOUT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmpv6_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_icmpv6_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
+				       struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+	pn->ctl_table = kmemdup(icmpv6_sysctl_table,
+				sizeof(icmpv6_sysctl_table),
+				GFP_KERNEL);
+	if (!pn->ctl_table)
+		return -ENOMEM;
+
+	pn->ctl_table[0].data = &in->timeout;
+#endif
+	return 0;
+}
+
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
+{
+	struct nf_icmp_net *in = icmpv6_pernet(net);
+	struct nf_proto_net *pn = &in->pn;
+
+	in->timeout = nf_ct_icmpv6_timeout;
+
+	return icmpv6_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmpv6.pn;
+}
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
+{
+	.l3proto		= PF_INET6,
+	.l4proto		= IPPROTO_ICMPV6,
+	.name			= "icmpv6",
+	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
+	.invert_tuple		= icmpv6_invert_tuple,
+	.print_tuple		= icmpv6_print_tuple,
+	.packet			= icmpv6_packet,
+	.get_timeouts		= icmpv6_get_timeouts,
+	.new			= icmpv6_new,
+	.error			= icmpv6_error,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= icmpv6_tuple_to_nlattr,
+	.nlattr_tuple_size	= icmpv6_nlattr_tuple_size,
+	.nlattr_to_tuple	= icmpv6_nlattr_to_tuple,
+	.nla_policy		= icmpv6_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= icmpv6_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= icmpv6_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
+		.obj_size	= sizeof(unsigned int),
+		.nla_policy	= icmpv6_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+	.init_net		= icmpv6_init_net,
+	.get_net_proto		= icmpv6_get_net_proto,
+};
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
new file mode 100644
index 00000000000..0d5279fd852
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -0,0 +1,694 @@
+/*
+ * IPv6 fragment reassembly for connection tracking
+ *
+ * Copyright (C)2004 USAGI/WIDE Project
+ *
+ * Author:
+ *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * Based on: net/ipv6/reassembly.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "IPv6-nf: " fmt
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+#include <net/inet_frag.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/inet_ecn.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <linux/sysctl.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+
+struct nf_ct_frag6_skb_cb
+{
+	struct inet6_skb_parm	h;
+	int			offset;
+	struct sk_buff		*orig;
+};
+
+#define NFCT_FRAG6_CB(skb)	((struct nf_ct_frag6_skb_cb*)((skb)->cb))
+
+static struct inet_frags nf_frags;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_frag6_timeout",
+		.data		= &init_net.nf_frag.frags.timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "nf_conntrack_frag6_low_thresh",
+		.data		= &init_net.nf_frag.frags.low_thresh,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "nf_conntrack_frag6_high_thresh",
+		.data		= &init_net.nf_frag.frags.high_thresh,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{ }
+};
+
+static int nf_ct_frag6_sysctl_register(struct net *net)
+{
+	struct ctl_table *table;
+	struct ctl_table_header *hdr;
+
+	table = nf_ct_frag6_sysctl_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
+				GFP_KERNEL);
+		if (table == NULL)
+			goto err_alloc;
+
+		table[0].data = &net->nf_frag.frags.timeout;
+		table[1].data = &net->nf_frag.frags.low_thresh;
+		table[2].data = &net->nf_frag.frags.high_thresh;
+	}
+
+	hdr = register_net_sysctl(net, "net/netfilter", table);
+	if (hdr == NULL)
+		goto err_reg;
+
+	net->nf_frag.sysctl.frags_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+#else
+static int nf_ct_frag6_sysctl_register(struct net *net)
+{
+	return 0;
+}
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
+{
+}
+#endif
+
+static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+{
+	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+}
+
+static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
+				 const struct in6_addr *daddr)
+{
+	u32 c;
+
+	net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
+	c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+			 (__force u32)id, nf_frags.rnd);
+	return c & (INETFRAGS_HASHSZ - 1);
+}
+
+
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
+{
+	const struct frag_queue *nq;
+
+	nq = container_of(q, struct frag_queue, q);
+	return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
+}
+
+static void nf_skb_free(struct sk_buff *skb)
+{
+	if (NFCT_FRAG6_CB(skb)->orig)
+		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+}
+
+static void nf_ct_frag6_expire(unsigned long data)
+{
+	struct frag_queue *fq;
+	struct net *net;
+
+	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	net = container_of(fq->q.net, struct net, nf_frag.frags);
+
+	ip6_expire_frag_queue(net, fq, &nf_frags);
+}
+
+/* Creation primitives. */
+static inline struct frag_queue *fq_find(struct net *net, __be32 id,
+					 u32 user, struct in6_addr *src,
+					 struct in6_addr *dst, u8 ecn)
+{
+	struct inet_frag_queue *q;
+	struct ip6_create_arg arg;
+	unsigned int hash;
+
+	arg.id = id;
+	arg.user = user;
+	arg.src = src;
+	arg.dst = dst;
+	arg.ecn = ecn;
+
+	read_lock_bh(&nf_frags.lock);
+	hash = nf_hash_frag(id, src, dst);
+
+	q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
+	local_bh_enable();
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
+	return container_of(q, struct frag_queue, q);
+}
+
+
+static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
+			     const struct frag_hdr *fhdr, int nhoff)
+{
+	struct sk_buff *prev, *next;
+	unsigned int payload_len;
+	int offset, end;
+	u8 ecn;
+
+	if (fq->q.last_in & INET_FRAG_COMPLETE) {
+		pr_debug("Already completed\n");
+		goto err;
+	}
+
+	payload_len = ntohs(ipv6_hdr(skb)->payload_len);
+
+	offset = ntohs(fhdr->frag_off) & ~0x7;
+	end = offset + (payload_len -
+			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
+
+	if ((unsigned int)end > IPV6_MAXPLEN) {
+		pr_debug("offset is too large.\n");
+		return -1;
+	}
+
+	ecn = ip6_frag_ecn(ipv6_hdr(skb));
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		const unsigned char *nh = skb_network_header(skb);
+		skb->csum = csum_sub(skb->csum,
+				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
+						  0));
+	}
+
+	/* Is this the final fragment? */
+	if (!(fhdr->frag_off & htons(IP6_MF))) {
+		/* If we already have some bits beyond end
+		 * or have different end, the segment is corrupted.
+		 */
+		if (end < fq->q.len ||
+		    ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) {
+			pr_debug("already received last fragment\n");
+			goto err;
+		}
+		fq->q.last_in |= INET_FRAG_LAST_IN;
+		fq->q.len = end;
+	} else {
+		/* Check if the fragment is rounded to 8 bytes.
+		 * Required by the RFC.
+		 */
+		if (end & 0x7) {
+			/* RFC2460 says always send parameter problem in
+			 * this case. -DaveM
+			 */
+			pr_debug("end of fragment not rounded to 8 bytes.\n");
+			return -1;
+		}
+		if (end > fq->q.len) {
+			/* Some bits beyond end -> corruption. */
+			if (fq->q.last_in & INET_FRAG_LAST_IN) {
+				pr_debug("last packet already reached.\n");
+				goto err;
+			}
+			fq->q.len = end;
+		}
+	}
+
+	if (end == offset)
+		goto err;
+
+	/* Point into the IP datagram 'data' part. */
+	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
+		pr_debug("queue: message is too short.\n");
+		goto err;
+	}
+	if (pskb_trim_rcsum(skb, end - offset)) {
+		pr_debug("Can't trim\n");
+		goto err;
+	}
+
+	/* Find out which fragments are in front and at the back of us
+	 * in the chain of fragments so far.  We must know where to put
+	 * this fragment, right?
+	 */
+	prev = fq->q.fragments_tail;
+	if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
+	prev = NULL;
+	for (next = fq->q.fragments; next != NULL; next = next->next) {
+		if (NFCT_FRAG6_CB(next)->offset >= offset)
+			break;	/* bingo! */
+		prev = next;
+	}
+
+found:
+	/* RFC5722, Section 4:
+	 *                                  When reassembling an IPv6 datagram, if
+	 *   one or more its constituent fragments is determined to be an
+	 *   overlapping fragment, the entire datagram (and any constituent
+	 *   fragments, including those not yet received) MUST be silently
+	 *   discarded.
+	 */
+
+	/* Check for overlap with preceding fragment. */
+	if (prev &&
+	    (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
+		goto discard_fq;
+
+	/* Look for overlap with succeeding segment. */
+	if (next && NFCT_FRAG6_CB(next)->offset < end)
+		goto discard_fq;
+
+	NFCT_FRAG6_CB(skb)->offset = offset;
+
+	/* Insert this fragment in the chain of fragments. */
+	skb->next = next;
+	if (!next)
+		fq->q.fragments_tail = skb;
+	if (prev)
+		prev->next = skb;
+	else
+		fq->q.fragments = skb;
+
+	if (skb->dev) {
+		fq->iif = skb->dev->ifindex;
+		skb->dev = NULL;
+	}
+	fq->q.stamp = skb->tstamp;
+	fq->q.meat += skb->len;
+	fq->ecn |= ecn;
+	if (payload_len > fq->q.max_size)
+		fq->q.max_size = payload_len;
+	add_frag_mem_limit(&fq->q, skb->truesize);
+
+	/* The first fragment.
+	 * nhoffset is obtained from the first fragment, of course.
+	 */
+	if (offset == 0) {
+		fq->nhoffset = nhoff;
+		fq->q.last_in |= INET_FRAG_FIRST_IN;
+	}
+
+	inet_frag_lru_move(&fq->q);
+	return 0;
+
+discard_fq:
+	inet_frag_kill(&fq->q, &nf_frags);
+err:
+	return -1;
+}
+
+/*
+ *	Check if this packet is complete.
+ *	Returns NULL on failure by any reason, and pointer
+ *	to current nexthdr field in reassembled frame.
+ *
+ *	It is called with locked fq, and caller must check that
+ *	queue is eligible for reassembly i.e. it is not COMPLETE,
+ *	the last and the first frames arrived and all the bits are here.
+ */
+static struct sk_buff *
+nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+{
+	struct sk_buff *fp, *op, *head = fq->q.fragments;
+	int    payload_len;
+	u8 ecn;
+
+	inet_frag_kill(&fq->q, &nf_frags);
+
+	WARN_ON(head == NULL);
+	WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+
+	ecn = ip_frag_ecn_table[fq->ecn];
+	if (unlikely(ecn == 0xff))
+		goto out_fail;
+
+	/* Unfragmented part is taken from the first segment. */
+	payload_len = ((head->data - skb_network_header(head)) -
+		       sizeof(struct ipv6hdr) + fq->q.len -
+		       sizeof(struct frag_hdr));
+	if (payload_len > IPV6_MAXPLEN) {
+		pr_debug("payload len is too large.\n");
+		goto out_oversize;
+	}
+
+	/* Head of list must not be cloned. */
+	if (skb_unclone(head, GFP_ATOMIC)) {
+		pr_debug("skb is cloned but can't expand head");
+		goto out_oom;
+	}
+
+	/* If the first fragment is fragmented itself, we split
+	 * it to two chunks: the first with data and paged part
+	 * and the second, holding only fragments. */
+	if (skb_has_frag_list(head)) {
+		struct sk_buff *clone;
+		int i, plen = 0;
+
+		clone = alloc_skb(0, GFP_ATOMIC);
+		if (clone == NULL)
+			goto out_oom;
+
+		clone->next = head->next;
+		head->next = clone;
+		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+		skb_frag_list_init(head);
+		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+		clone->len = clone->data_len = head->data_len - plen;
+		head->data_len -= clone->len;
+		head->len -= clone->len;
+		clone->csum = 0;
+		clone->ip_summed = head->ip_summed;
+
+		NFCT_FRAG6_CB(clone)->orig = NULL;
+		add_frag_mem_limit(&fq->q, clone->truesize);
+	}
+
+	/* We have to remove fragment header from datagram and to relocate
+	 * header in order to calculate ICV correctly. */
+	skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
+	memmove(head->head + sizeof(struct frag_hdr), head->head,
+		(head->data - head->head) - sizeof(struct frag_hdr));
+	head->mac_header += sizeof(struct frag_hdr);
+	head->network_header += sizeof(struct frag_hdr);
+
+	skb_shinfo(head)->frag_list = head->next;
+	skb_reset_transport_header(head);
+	skb_push(head, head->data - skb_network_header(head));
+
+	for (fp=head->next; fp; fp = fp->next) {
+		head->data_len += fp->len;
+		head->len += fp->len;
+		if (head->ip_summed != fp->ip_summed)
+			head->ip_summed = CHECKSUM_NONE;
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
+			head->csum = csum_add(head->csum, fp->csum);
+		head->truesize += fp->truesize;
+	}
+	sub_frag_mem_limit(&fq->q, head->truesize);
+
+	head->ignore_df = 1;
+	head->next = NULL;
+	head->dev = dev;
+	head->tstamp = fq->q.stamp;
+	ipv6_hdr(head)->payload_len = htons(payload_len);
+	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
+	IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
+
+	/* Yes, and fold redundant checksum back. 8) */
+	if (head->ip_summed == CHECKSUM_COMPLETE)
+		head->csum = csum_partial(skb_network_header(head),
+					  skb_network_header_len(head),
+					  head->csum);
+
+	fq->q.fragments = NULL;
+	fq->q.fragments_tail = NULL;
+
+	/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
+	fp = skb_shinfo(head)->frag_list;
+	if (fp && NFCT_FRAG6_CB(fp)->orig == NULL)
+		/* at above code, head skb is divided into two skbs. */
+		fp = fp->next;
+
+	op = NFCT_FRAG6_CB(head)->orig;
+	for (; fp; fp = fp->next) {
+		struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
+
+		op->next = orig;
+		op = orig;
+		NFCT_FRAG6_CB(fp)->orig = NULL;
+	}
+
+	return head;
+
+out_oversize:
+	net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
+			    payload_len);
+	goto out_fail;
+out_oom:
+	net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n");
+out_fail:
+	return NULL;
+}
+
+/*
+ * find the header just before Fragment Header.
+ *
+ * if success return 0 and set ...
+ * (*prevhdrp): the value of "Next Header Field" in the header
+ *		just before Fragment Header.
+ * (*prevhoff): the offset of "Next Header Field" in the header
+ *		just before Fragment Header.
+ * (*fhoff)   : the offset of Fragment Header.
+ *
+ * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
+ *
+ */
+static int
+find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
+{
+	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+	const int netoff = skb_network_offset(skb);
+	u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
+	int start = netoff + sizeof(struct ipv6hdr);
+	int len = skb->len - start;
+	u8 prevhdr = NEXTHDR_IPV6;
+
+	while (nexthdr != NEXTHDR_FRAGMENT) {
+		struct ipv6_opt_hdr hdr;
+		int hdrlen;
+
+		if (!ipv6_ext_hdr(nexthdr)) {
+			return -1;
+		}
+		if (nexthdr == NEXTHDR_NONE) {
+			pr_debug("next header is none\n");
+			return -1;
+		}
+		if (len < (int)sizeof(struct ipv6_opt_hdr)) {
+			pr_debug("too short\n");
+			return -1;
+		}
+		if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
+			BUG();
+		if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hdr.hdrlen+2)<<2;
+		else
+			hdrlen = ipv6_optlen(&hdr);
+
+		prevhdr = nexthdr;
+		prev_nhoff = start;
+
+		nexthdr = hdr.nexthdr;
+		len -= hdrlen;
+		start += hdrlen;
+	}
+
+	if (len < 0)
+		return -1;
+
+	*prevhdrp = prevhdr;
+	*prevhoff = prev_nhoff;
+	*fhoff = start;
+
+	return 0;
+}
+
+struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
+{
+	struct sk_buff *clone;
+	struct net_device *dev = skb->dev;
+	struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
+				       : dev_net(skb->dev);
+	struct frag_hdr *fhdr;
+	struct frag_queue *fq;
+	struct ipv6hdr *hdr;
+	int fhoff, nhoff;
+	u8 prevhdr;
+	struct sk_buff *ret_skb = NULL;
+
+	/* Jumbo payload inhibits frag. header */
+	if (ipv6_hdr(skb)->payload_len == 0) {
+		pr_debug("payload len = 0\n");
+		return skb;
+	}
+
+	if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
+		return skb;
+
+	clone = skb_clone(skb, GFP_ATOMIC);
+	if (clone == NULL) {
+		pr_debug("Can't clone skb\n");
+		return skb;
+	}
+
+	NFCT_FRAG6_CB(clone)->orig = skb;
+
+	if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
+		pr_debug("message is too short.\n");
+		goto ret_orig;
+	}
+
+	skb_set_transport_header(clone, fhoff);
+	hdr = ipv6_hdr(clone);
+	fhdr = (struct frag_hdr *)skb_transport_header(clone);
+
+	local_bh_disable();
+	inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
+	local_bh_enable();
+
+	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
+		     ip6_frag_ecn(hdr));
+	if (fq == NULL) {
+		pr_debug("Can't find and can't create new queue\n");
+		goto ret_orig;
+	}
+
+	spin_lock_bh(&fq->q.lock);
+
+	if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
+		spin_unlock_bh(&fq->q.lock);
+		pr_debug("Can't insert skb to queue\n");
+		inet_frag_put(&fq->q, &nf_frags);
+		goto ret_orig;
+	}
+
+	if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+	    fq->q.meat == fq->q.len) {
+		ret_skb = nf_ct_frag6_reasm(fq, dev);
+		if (ret_skb == NULL)
+			pr_debug("Can't reassemble fragmented packets\n");
+	}
+	spin_unlock_bh(&fq->q.lock);
+
+	inet_frag_put(&fq->q, &nf_frags);
+	return ret_skb;
+
+ret_orig:
+	kfree_skb(clone);
+	return skb;
+}
+
+void nf_ct_frag6_consume_orig(struct sk_buff *skb)
+{
+	struct sk_buff *s, *s2;
+
+	for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
+		s2 = s->next;
+		s->next = NULL;
+		consume_skb(s);
+		s = s2;
+	}
+}
+
+static int nf_ct_net_init(struct net *net)
+{
+	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+	inet_frags_init_net(&net->nf_frag.frags);
+
+	return nf_ct_frag6_sysctl_register(net);
+}
+
+static void nf_ct_net_exit(struct net *net)
+{
+	nf_ct_frags6_sysctl_unregister(net);
+	inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+}
+
+static struct pernet_operations nf_ct_net_ops = {
+	.init = nf_ct_net_init,
+	.exit = nf_ct_net_exit,
+};
+
+int nf_ct_frag6_init(void)
+{
+	int ret = 0;
+
+	nf_frags.hashfn = nf_hashfn;
+	nf_frags.constructor = ip6_frag_init;
+	nf_frags.destructor = NULL;
+	nf_frags.skb_free = nf_skb_free;
+	nf_frags.qsize = sizeof(struct frag_queue);
+	nf_frags.match = ip6_frag_match;
+	nf_frags.frag_expire = nf_ct_frag6_expire;
+	nf_frags.secret_interval = 10 * 60 * HZ;
+	inet_frags_init(&nf_frags);
+
+	ret = register_pernet_subsys(&nf_ct_net_ops);
+	if (ret)
+		inet_frags_fini(&nf_frags);
+
+	return ret;
+}
+
+void nf_ct_frag6_cleanup(void)
+{
+	unregister_pernet_subsys(&nf_ct_net_ops);
+	inet_frags_fini(&nf_frags);
+}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
new file mode 100644
index 00000000000..7b9a748c6ba
--- /dev/null
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -0,0 +1,140 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
+						struct sk_buff *skb)
+{
+	u16 zone = NF_CT_DEFAULT_ZONE;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	if (skb->nfct)
+		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge &&
+	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+#endif
+	if (hooknum == NF_INET_PRE_ROUTING)
+		return IP6_DEFRAG_CONNTRACK_IN + zone;
+	else
+		return IP6_DEFRAG_CONNTRACK_OUT + zone;
+
+}
+
+static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
+				struct sk_buff *skb,
+				const struct net_device *in,
+				const struct net_device *out,
+				int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *reasm;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	/* Previously seen (loopback)?	*/
+	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
+		return NF_ACCEPT;
+#endif
+
+	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
+	/* queued */
+	if (reasm == NULL)
+		return NF_STOLEN;
+
+	/* error occurred or not fragmented */
+	if (reasm == skb)
+		return NF_ACCEPT;
+
+	nf_ct_frag6_consume_orig(reasm);
+
+	NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm,
+		       (struct net_device *) in, (struct net_device *) out,
+		       okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+
+	return NF_STOLEN;
+}
+
+static struct nf_hook_ops ipv6_defrag_ops[] = {
+	{
+		.hook		= ipv6_defrag,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
+	},
+	{
+		.hook		= ipv6_defrag,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
+	},
+};
+
+static int __init nf_defrag_init(void)
+{
+	int ret = 0;
+
+	ret = nf_ct_frag6_init();
+	if (ret < 0) {
+		pr_err("nf_defrag_ipv6: can't initialize frag6.\n");
+		return ret;
+	}
+	ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+	if (ret < 0) {
+		pr_err("nf_defrag_ipv6: can't register hooks\n");
+		goto cleanup_frag6;
+	}
+	return ret;
+
+cleanup_frag6:
+	nf_ct_frag6_cleanup();
+	return ret;
+
+}
+
+static void __exit nf_defrag_fini(void)
+{
+	nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+	nf_ct_frag6_cleanup();
+}
+
+void nf_defrag_ipv6_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
+
+module_init(nf_defrag_init);
+module_exit(nf_defrag_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
new file mode 100644
index 00000000000..abfe75a2e31
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of IPv6 NAT funded by Astaro.
+ */
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
+				       const struct nf_conn *ct,
+				       enum ip_conntrack_dir dir,
+				       unsigned long statusbit,
+				       struct flowi *fl)
+{
+	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+	struct flowi6 *fl6 = &fl->u.ip6;
+
+	if (ct->status & statusbit) {
+		fl6->daddr = t->dst.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_dport = t->dst.u.all;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl6->saddr = t->src.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_sport = t->src.u.all;
+	}
+}
+#endif
+
+static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
+				 const struct nf_nat_range *range)
+{
+	return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
+	       ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
+}
+
+static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
+				   __be16 dport)
+{
+	return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
+}
+
+static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
+				  unsigned int iphdroff,
+				  const struct nf_nat_l4proto *l4proto,
+				  const struct nf_conntrack_tuple *target,
+				  enum nf_nat_manip_type maniptype)
+{
+	struct ipv6hdr *ipv6h;
+	__be16 frag_off;
+	int hdroff;
+	u8 nexthdr;
+
+	if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+		return false;
+
+	ipv6h = (void *)skb->data + iphdroff;
+	nexthdr = ipv6h->nexthdr;
+	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
+				  &nexthdr, &frag_off);
+	if (hdroff < 0)
+		goto manip_addr;
+
+	if ((frag_off & htons(~0x7)) == 0 &&
+	    !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
+				target, maniptype))
+		return false;
+manip_addr:
+	if (maniptype == NF_NAT_MANIP_SRC)
+		ipv6h->saddr = target->src.u3.in6;
+	else
+		ipv6h->daddr = target->dst.u3.in6;
+
+	return true;
+}
+
+static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
+				    unsigned int iphdroff, __sum16 *check,
+				    const struct nf_conntrack_tuple *t,
+				    enum nf_nat_manip_type maniptype)
+{
+	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
+	const struct in6_addr *oldip, *newip;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		oldip = &ipv6h->saddr;
+		newip = &t->src.u3.in6;
+	} else {
+		oldip = &ipv6h->daddr;
+		newip = &t->dst.u3.in6;
+	}
+	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
+				  newip->s6_addr32, 1);
+}
+
+static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
+				    u8 proto, void *data, __sum16 *check,
+				    int datalen, int oldlen)
+{
+	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (!(rt->rt6i_flags & RTF_LOCAL) &&
+		    (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_headroom(skb) +
+					  skb_network_offset(skb) +
+					  (data - (void *)skb->data);
+			skb->csum_offset = (void *)check - data;
+			*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+						  datalen, proto, 0);
+		} else {
+			*check = 0;
+			*check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+						 datalen, proto,
+						 csum_partial(data, datalen,
+							      0));
+			if (proto == IPPROTO_UDP && !*check)
+				*check = CSUM_MANGLED_0;
+		}
+	} else
+		inet_proto_csum_replace2(check, skb,
+					 htons(oldlen), htons(datalen), 1);
+}
+
+static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
+				       struct nf_nat_range *range)
+{
+	if (tb[CTA_NAT_V6_MINIP]) {
+		nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
+			   sizeof(struct in6_addr));
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (tb[CTA_NAT_V6_MAXIP])
+		nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
+			   sizeof(struct in6_addr));
+	else
+		range->max_addr = range->min_addr;
+
+	return 0;
+}
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
+	.l3proto		= NFPROTO_IPV6,
+	.secure_port		= nf_nat_ipv6_secure_port,
+	.in_range		= nf_nat_ipv6_in_range,
+	.manip_pkt		= nf_nat_ipv6_manip_pkt,
+	.csum_update		= nf_nat_ipv6_csum_update,
+	.csum_recalc		= nf_nat_ipv6_csum_recalc,
+	.nlattr_to_range	= nf_nat_ipv6_nlattr_to_range,
+#ifdef CONFIG_XFRM
+	.decode_session	= nf_nat_ipv6_decode_session,
+#endif
+};
+
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+				    struct nf_conn *ct,
+				    enum ip_conntrack_info ctinfo,
+				    unsigned int hooknum,
+				    unsigned int hdrlen)
+{
+	struct {
+		struct icmp6hdr	icmp6;
+		struct ipv6hdr	ip6;
+	} *inside;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+	const struct nf_nat_l4proto *l4proto;
+	struct nf_conntrack_tuple target;
+	unsigned long statusbit;
+
+	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+		return 0;
+	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
+		return 0;
+
+	inside = (void *)skb->data + hdrlen;
+	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
+		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+			return 0;
+		if (ct->status & IPS_NAT_MASK)
+			return 0;
+	}
+
+	if (manip == NF_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply direction */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	if (!(ct->status & statusbit))
+		return 1;
+
+	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
+	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
+				   l4proto, &ct->tuplehash[!dir].tuple, !manip))
+		return 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+		inside = (void *)skb->data + hdrlen;
+		inside->icmp6.icmp6_cksum = 0;
+		inside->icmp6.icmp6_cksum =
+			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+					skb->len - hdrlen, IPPROTO_ICMPV6,
+					csum_partial(&inside->icmp6,
+						     skb->len - hdrlen, 0));
+	}
+
+	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
+	if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+
+static int __init nf_nat_l3proto_ipv6_init(void)
+{
+	int err;
+
+	err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+	if (err < 0)
+		goto err1;
+	err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
+	if (err < 0)
+		goto err2;
+	return err;
+
+err2:
+	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+err1:
+	return err;
+}
+
+static void __exit nf_nat_l3proto_ipv6_exit(void)
+{
+	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
+	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
+
+module_init(nf_nat_l3proto_ipv6_init);
+module_exit(nf_nat_l3proto_ipv6_exit);
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
new file mode 100644
index 00000000000..2205e8eeeac
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/icmpv6.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static bool
+icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
+		enum nf_nat_manip_type maniptype,
+		const union nf_conntrack_man_proto *min,
+		const union nf_conntrack_man_proto *max)
+{
+	return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+	       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+}
+
+static void
+icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
+		    struct nf_conntrack_tuple *tuple,
+		    const struct nf_nat_range *range,
+		    enum nf_nat_manip_type maniptype,
+		    const struct nf_conn *ct)
+{
+	static u16 id;
+	unsigned int range_size;
+	unsigned int i;
+
+	range_size = ntohs(range->max_proto.icmp.id) -
+		     ntohs(range->min_proto.icmp.id) + 1;
+
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
+		range_size = 0xffff;
+
+	for (i = 0; ; ++id) {
+		tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
+					     (id % range_size));
+		if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+			return;
+	}
+}
+
+static bool
+icmpv6_manip_pkt(struct sk_buff *skb,
+		 const struct nf_nat_l3proto *l3proto,
+		 unsigned int iphdroff, unsigned int hdroff,
+		 const struct nf_conntrack_tuple *tuple,
+		 enum nf_nat_manip_type maniptype)
+{
+	struct icmp6hdr *hdr;
+
+	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct icmp6hdr *)(skb->data + hdroff);
+	l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
+			     tuple, maniptype);
+	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
+	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
+		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
+					 hdr->icmp6_identifier,
+					 tuple->src.u.icmp.id, 0);
+		hdr->icmp6_identifier = tuple->src.u.icmp.id;
+	}
+	return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
+	.l4proto		= IPPROTO_ICMPV6,
+	.manip_pkt		= icmpv6_manip_pkt,
+	.in_range		= icmpv6_in_range,
+	.unique_tuple		= icmpv6_unique_tuple,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
new file mode 100644
index 00000000000..0d812b31277
--- /dev/null
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
+				      struct sk_buff *skb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	struct nft_pktinfo pkt;
+
+	/* malformed packet, drop it */
+	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+		return NF_DROP;
+
+	return nft_do_chain(&pkt, ops);
+}
+
+static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+				    struct sk_buff *skb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    int (*okfn)(struct sk_buff *))
+{
+	if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+		if (net_ratelimit())
+			pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
+				"packet\n");
+		return NF_ACCEPT;
+	}
+
+	return nft_do_chain_ipv6(ops, skb, in, out, okfn);
+}
+
+struct nft_af_info nft_af_ipv6 __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.nhooks		= NF_INET_NUMHOOKS,
+	.owner		= THIS_MODULE,
+	.nops		= 1,
+	.hooks		= {
+		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv6,
+		[NF_INET_LOCAL_OUT]	= nft_ipv6_output,
+		[NF_INET_FORWARD]	= nft_do_chain_ipv6,
+		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv6,
+		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv6,
+	},
+};
+EXPORT_SYMBOL_GPL(nft_af_ipv6);
+
+static int nf_tables_ipv6_init_net(struct net *net)
+{
+	net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+	if (net->nft.ipv6 == NULL)
+		return -ENOMEM;
+
+	memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
+
+	if (nft_register_afinfo(net, net->nft.ipv6) < 0)
+		goto err;
+
+	return 0;
+err:
+	kfree(net->nft.ipv6);
+	return -ENOMEM;
+}
+
+static void nf_tables_ipv6_exit_net(struct net *net)
+{
+	nft_unregister_afinfo(net->nft.ipv6);
+	kfree(net->nft.ipv6);
+}
+
+static struct pernet_operations nf_tables_ipv6_net_ops = {
+	.init	= nf_tables_ipv6_init_net,
+	.exit	= nf_tables_ipv6_exit_net,
+};
+
+static const struct nf_chain_type filter_ipv6 = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_IPV6,
+	.owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING),
+};
+
+static int __init nf_tables_ipv6_init(void)
+{
+	int ret;
+
+	nft_register_chain_type(&filter_ipv6);
+	ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
+	if (ret < 0)
+		nft_unregister_chain_type(&filter_ipv6);
+
+	return ret;
+}
+
+static void __exit nf_tables_ipv6_exit(void)
+{
+	unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
+	nft_unregister_chain_type(&filter_ipv6);
+}
+
+module_init(nf_tables_ipv6_init);
+module_exit(nf_tables_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET6);
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
new file mode 100644
index 00000000000..d189fcb437f
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ipv6.h>
+
+/*
+ * IPv6 NAT chains
+ */
+
+static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
+			      struct sk_buff *skb,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      int (*okfn)(struct sk_buff *))
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_nat *nat;
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+	__be16 frag_off;
+	int hdrlen;
+	u8 nexthdr;
+	struct nft_pktinfo pkt;
+	unsigned int ret;
+
+	if (ct == NULL || nf_ct_is_untracked(ct))
+		return NF_ACCEPT;
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED + IP_CT_IS_REPLY:
+		nexthdr = ipv6_hdr(skb)->nexthdr;
+		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+					  &nexthdr, &frag_off);
+
+		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+							   ops->hooknum,
+							   hdrlen))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+		/* Fall through */
+	case IP_CT_NEW:
+		if (nf_nat_initialized(ct, maniptype))
+			break;
+
+		nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
+
+		ret = nft_do_chain(&pkt, ops);
+		if (ret != NF_ACCEPT)
+			return ret;
+		if (!nf_nat_initialized(ct, maniptype)) {
+			ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+			if (ret != NF_ACCEPT)
+				return ret;
+		}
+	default:
+		break;
+	}
+
+	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops,
+				      struct sk_buff *skb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+	unsigned int ret;
+
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+		skb_dst_drop(skb);
+
+	return ret;
+}
+
+static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops,
+				       struct sk_buff *skb,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	enum ip_conntrack_info ctinfo __maybe_unused;
+	const struct nf_conn *ct __maybe_unused;
+	unsigned int ret;
+
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+				      &ct->tuplehash[!dir].tuple.dst.u3) ||
+		    (ct->tuplehash[dir].tuple.src.u.all !=
+		     ct->tuplehash[!dir].tuple.dst.u.all))
+			if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
+				ret = NF_DROP;
+	}
+#endif
+	return ret;
+}
+
+static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops,
+				  struct sk_buff *skb,
+				  const struct net_device *in,
+				  const struct net_device *out,
+				  int (*okfn)(struct sk_buff *))
+{
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	unsigned int ret;
+
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+				      &ct->tuplehash[!dir].tuple.src.u3)) {
+			if (ip6_route_me_harder(skb))
+				ret = NF_DROP;
+		}
+#ifdef CONFIG_XFRM
+		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+			 ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all)
+			if (nf_xfrm_me_harder(skb, AF_INET6))
+				ret = NF_DROP;
+#endif
+	}
+	return ret;
+}
+
+static const struct nf_chain_type nft_chain_nat_ipv6 = {
+	.name		= "nat",
+	.type		= NFT_CHAIN_T_NAT,
+	.family		= NFPROTO_IPV6,
+	.owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
+	.hooks		= {
+		[NF_INET_PRE_ROUTING]	= nf_nat_ipv6_prerouting,
+		[NF_INET_POST_ROUTING]	= nf_nat_ipv6_postrouting,
+		[NF_INET_LOCAL_OUT]	= nf_nat_ipv6_output,
+		[NF_INET_LOCAL_IN]	= nf_nat_ipv6_fn,
+	},
+};
+
+static int __init nft_chain_nat_ipv6_init(void)
+{
+	int err;
+
+	err = nft_register_chain_type(&nft_chain_nat_ipv6);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static void __exit nft_chain_nat_ipv6_exit(void)
+{
+	nft_unregister_chain_type(&nft_chain_nat_ipv6);
+}
+
+module_init(nft_chain_nat_ipv6_init);
+module_exit(nft_chain_nat_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
new file mode 100644
index 00000000000..42031299585
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/route.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+					struct sk_buff *skb,
+					const struct net_device *in,
+					const struct net_device *out,
+					int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+	struct nft_pktinfo pkt;
+	struct in6_addr saddr, daddr;
+	u_int8_t hop_limit;
+	u32 mark, flowlabel;
+
+	/* malformed packet, drop it */
+	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+		return NF_DROP;
+
+	/* save source/dest address, mark, hoplimit, flowlabel, priority */
+	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+	mark = skb->mark;
+	hop_limit = ipv6_hdr(skb)->hop_limit;
+
+	/* flowlabel and prio (includes version, which shouldn't change either */
+	flowlabel = *((u32 *)ipv6_hdr(skb));
+
+	ret = nft_do_chain(&pkt, ops);
+	if (ret != NF_DROP && ret != NF_QUEUE &&
+	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+	     skb->mark != mark ||
+	     ipv6_hdr(skb)->hop_limit != hop_limit ||
+	     flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
+		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+
+	return ret;
+}
+
+static const struct nf_chain_type nft_chain_route_ipv6 = {
+	.name		= "route",
+	.type		= NFT_CHAIN_T_ROUTE,
+	.family		= NFPROTO_IPV6,
+        .owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
+	.hooks		= {
+                [NF_INET_LOCAL_OUT]	= nf_route_table_hook,
+        },
+};
+
+static int __init nft_chain_route_init(void)
+{
+	return nft_register_chain_type(&nft_chain_route_ipv6);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+	nft_unregister_chain_type(&nft_chain_route_ipv6);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
new file mode 100644
index 00000000000..0bc19fa8782
--- /dev/null
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+void nft_reject_ipv6_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt)
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+		nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+				 pkt->ops->hooknum);
+		break;
+	case NFT_REJECT_TCP_RST:
+		nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+		break;
+	}
+
+	data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval);
+
+static struct nft_expr_type nft_reject_ipv6_type;
+static const struct nft_expr_ops nft_reject_ipv6_ops = {
+	.type		= &nft_reject_ipv6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+	.eval		= nft_reject_ipv6_eval,
+	.init		= nft_reject_init,
+	.dump		= nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.name		= "reject",
+	.ops		= &nft_reject_ipv6_ops,
+	.policy		= nft_reject_policy,
+	.maxattr	= NFTA_REJECT_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_reject_ipv6_module_init(void)
+{
+	return nft_register_expr(&nft_reject_ipv6_type);
+}
+
+static void __exit nft_reject_ipv6_module_exit(void)
+{
+	nft_unregister_expr(&nft_reject_ipv6_type);
+}
+
+module_init(nft_reject_ipv6_module_init);
+module_exit(nft_reject_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject");
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
new file mode 100644
index 00000000000..5ec867e4a8b
--- /dev/null
+++ b/net/ipv6/output_core.c
@@ -0,0 +1,98 @@
+/*
+ * IPv6 library code, needed by static components when full IPv6 support is
+ * not configured or static.  These functions are needed by GSO/GRO implementation.
+ */
+#include <linux/export.h>
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/addrconf.h>
+#include <net/secure_seq.h>
+
+int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr =
+				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+	unsigned int packet_len = skb_tail_pointer(skb) -
+		skb_network_header(skb);
+	int found_rhdr = 0;
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (**nexthdr) {
+
+		case NEXTHDR_HOP:
+			break;
+		case NEXTHDR_ROUTING:
+			found_rhdr = 1;
+			break;
+		case NEXTHDR_DEST:
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+				break;
+#endif
+			if (found_rhdr)
+				return offset;
+			break;
+		default :
+			return offset;
+		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+						 offset);
+	}
+
+	return offset;
+}
+EXPORT_SYMBOL(ip6_find_1stfragopt);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int ip6_dst_hoplimit(struct dst_entry *dst)
+{
+	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+	if (hoplimit == 0) {
+		struct net_device *dev = dst->dev;
+		struct inet6_dev *idev;
+
+		rcu_read_lock();
+		idev = __in6_dev_get(dev);
+		if (idev)
+			hoplimit = idev->cnf.hop_limit;
+		else
+			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+		rcu_read_unlock();
+	}
+	return hoplimit;
+}
+EXPORT_SYMBOL(ip6_dst_hoplimit);
+#endif
+
+int __ip6_local_out(struct sk_buff *skb)
+{
+	int len;
+
+	len = skb->len - sizeof(struct ipv6hdr);
+	if (len > IPV6_MAXPLEN)
+		len = 0;
+	ipv6_hdr(skb)->payload_len = htons(len);
+	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+		       skb_dst(skb)->dev, dst_output);
+}
+EXPORT_SYMBOL_GPL(__ip6_local_out);
+
+int ip6_local_out(struct sk_buff *skb)
+{
+	int err;
+
+	err = __ip6_local_out(skb);
+	if (likely(err == 1))
+		err = dst_output(skb);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
new file mode 100644
index 00000000000..5b7a1ed2aba
--- /dev/null
+++ b/net/ipv6/ping.c
@@ -0,0 +1,275 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		"Ping" sockets
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Based on ipv4/ping.c code.
+ *
+ * Authors:	Lorenzo Colitti (IPv6 support)
+ *		Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
+ *		Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
+ *
+ */
+
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/transp_v6.h>
+#include <net/ping.h>
+
+struct proto pingv6_prot = {
+	.name =		"PINGv6",
+	.owner =	THIS_MODULE,
+	.init =		ping_init_sock,
+	.close =	ping_close,
+	.connect =	ip6_datagram_connect_v6_only,
+	.disconnect =	udp_disconnect,
+	.setsockopt =	ipv6_setsockopt,
+	.getsockopt =	ipv6_getsockopt,
+	.sendmsg =	ping_v6_sendmsg,
+	.recvmsg =	ping_recvmsg,
+	.bind =		ping_bind,
+	.backlog_rcv =	ping_queue_rcv_skb,
+	.hash =		ping_hash,
+	.unhash =	ping_unhash,
+	.get_port =	ping_get_port,
+	.obj_size =	sizeof(struct raw6_sock),
+};
+EXPORT_SYMBOL_GPL(pingv6_prot);
+
+static struct inet_protosw pingv6_protosw = {
+	.type =      SOCK_DGRAM,
+	.protocol =  IPPROTO_ICMPV6,
+	.prot =      &pingv6_prot,
+	.ops =       &inet6_dgram_ops,
+	.flags =     INET_PROTOSW_REUSE,
+};
+
+
+/* Compatibility glue so we can support IPv6 when it's compiled as a module */
+static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
+				 int *addr_len)
+{
+	return -EAFNOSUPPORT;
+}
+static void dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+				       struct sk_buff *skb)
+{
+}
+static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
+{
+	return -EAFNOSUPPORT;
+}
+static void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+				  __be16 port, u32 info, u8 *payload) {}
+static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+			       const struct net_device *dev, int strict)
+{
+	return 0;
+}
+
+int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		    size_t len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct icmp6hdr user_icmph;
+	int addr_type;
+	struct in6_addr *daddr;
+	int iif = 0;
+	struct flowi6 fl6;
+	int err;
+	int hlimit;
+	struct dst_entry *dst;
+	struct rt6_info *rt;
+	struct pingfakehdr pfh;
+
+	pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+	err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
+				  sizeof(user_icmph));
+	if (err)
+		return err;
+
+	if (msg->msg_name) {
+		DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name);
+		if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
+		    u->sin6_family != AF_INET6) {
+			return -EINVAL;
+		}
+		if (sk->sk_bound_dev_if &&
+		    sk->sk_bound_dev_if != u->sin6_scope_id) {
+			return -EINVAL;
+		}
+		daddr = &(u->sin6_addr);
+		iif = u->sin6_scope_id;
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -EDESTADDRREQ;
+		daddr = &sk->sk_v6_daddr;
+	}
+
+	if (!iif)
+		iif = sk->sk_bound_dev_if;
+
+	addr_type = ipv6_addr_type(daddr);
+	if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
+		return -EINVAL;
+	if (addr_type & IPV6_ADDR_MAPPED)
+		return -EINVAL;
+
+	/* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+
+	memset(&fl6, 0, sizeof(fl6));
+
+	fl6.flowi6_proto = IPPROTO_ICMPV6;
+	fl6.saddr = np->saddr;
+	fl6.daddr = *daddr;
+	fl6.flowi6_mark = sk->sk_mark;
+	fl6.fl6_icmp_type = user_icmph.icmp6_type;
+	fl6.fl6_icmp_code = user_icmph.icmp6_code;
+	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+		fl6.flowi6_oif = np->mcast_oif;
+	else if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->ucast_oif;
+
+	dst = ip6_sk_dst_lookup_flow(sk, &fl6,  daddr);
+	if (IS_ERR(dst))
+		return PTR_ERR(dst);
+	rt = (struct rt6_info *) dst;
+
+	np = inet6_sk(sk);
+	if (!np)
+		return -EBADF;
+
+	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+		fl6.flowi6_oif = np->mcast_oif;
+	else if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->ucast_oif;
+
+	pfh.icmph.type = user_icmph.icmp6_type;
+	pfh.icmph.code = user_icmph.icmp6_code;
+	pfh.icmph.checksum = 0;
+	pfh.icmph.un.echo.id = inet->inet_sport;
+	pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
+	pfh.iov = msg->msg_iov;
+	pfh.wcheck = 0;
+	pfh.family = AF_INET6;
+
+	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+
+	lock_sock(sk);
+	err = ip6_append_data(sk, ping_getfrag, &pfh, len,
+			      0, hlimit,
+			      np->tclass, NULL, &fl6, rt,
+			      MSG_DONTWAIT, np->dontfrag);
+
+	if (err) {
+		ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
+				ICMP6_MIB_OUTERRORS);
+		ip6_flush_pending_frames(sk);
+	} else {
+		err = icmpv6_push_pending_frames(sk, &fl6,
+						 (struct icmp6hdr *) &pfh.icmph,
+						 len);
+	}
+	release_sock(sk);
+
+	if (err)
+		return err;
+
+	return len;
+}
+
+#ifdef CONFIG_PROC_FS
+static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return ping_seq_start(seq, pos, AF_INET6);
+}
+
+static int ping_v6_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+	} else {
+		int bucket = ((struct ping_iter_state *) seq->private)->bucket;
+		struct inet_sock *inet = inet_sk(v);
+		__u16 srcp = ntohs(inet->inet_sport);
+		__u16 destp = ntohs(inet->inet_dport);
+		ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
+	}
+	return 0;
+}
+
+static struct ping_seq_afinfo ping_v6_seq_afinfo = {
+	.name		= "icmp6",
+	.family		= AF_INET6,
+	.seq_fops       = &ping_seq_fops,
+	.seq_ops	= {
+		.start		= ping_v6_seq_start,
+		.show		= ping_v6_seq_show,
+		.next		= ping_seq_next,
+		.stop		= ping_seq_stop,
+	},
+};
+
+static int __net_init ping_v6_proc_init_net(struct net *net)
+{
+	return ping_proc_register(net, &ping_v6_seq_afinfo);
+}
+
+static void __net_init ping_v6_proc_exit_net(struct net *net)
+{
+	return ping_proc_unregister(net, &ping_v6_seq_afinfo);
+}
+
+static struct pernet_operations ping_v6_net_ops = {
+	.init = ping_v6_proc_init_net,
+	.exit = ping_v6_proc_exit_net,
+};
+#endif
+
+int __init pingv6_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	int ret = register_pernet_subsys(&ping_v6_net_ops);
+	if (ret)
+		return ret;
+#endif
+	pingv6_ops.ipv6_recv_error = ipv6_recv_error;
+	pingv6_ops.ip6_datagram_recv_common_ctl = ip6_datagram_recv_common_ctl;
+	pingv6_ops.ip6_datagram_recv_specific_ctl =
+		ip6_datagram_recv_specific_ctl;
+	pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
+	pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
+	pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
+	return inet6_register_protosw(&pingv6_protosw);
+}
+
+/* This never gets called because it's not possible to unload the ipv6 module,
+ * but just in case.
+ */
+void pingv6_exit(void)
+{
+	pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
+	pingv6_ops.ip6_datagram_recv_common_ctl = dummy_ip6_datagram_recv_ctl;
+	pingv6_ops.ip6_datagram_recv_specific_ctl = dummy_ip6_datagram_recv_ctl;
+	pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
+	pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
+	pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
+#ifdef CONFIG_PROC_FS
+	unregister_pernet_subsys(&ping_v6_net_ops);
+#endif
+	inet6_unregister_protosw(&pingv6_protosw);
+}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 50a13e75d70..3317440ea34 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -7,8 +7,6 @@
  *		PROC file system.  This is very similar to the IPv4 version,
  *		except it reports the sockets in the INET6 address family.
  *
- * Version:	$Id: proc.c,v 1.17 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	David S. Miller (davem@caip.rutgers.edu)
  * 		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  *
@@ -17,49 +15,54 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  */
-#include <linux/config.h>
-#include <linux/sched.h>
 #include <linux/socket.h>
 #include <linux/net.h>
 #include <linux/ipv6.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
+#include <linux/export.h>
+#include <net/net_namespace.h>
+#include <net/ip.h>
 #include <net/sock.h>
 #include <net/tcp.h>
+#include <net/udp.h>
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
-#ifdef CONFIG_PROC_FS
-static struct proc_dir_entry *proc_net_devsnmp6;
-
-static int fold_prot_inuse(struct proto *proto)
-{
-	int res = 0;
-	int cpu;
-
-	for (cpu=0; cpu<NR_CPUS; cpu++)
-		res += proto->stats[cpu].inuse;
-
-	return res;
-}
-
 static int sockstat6_seq_show(struct seq_file *seq, void *v)
 {
+	struct net *net = seq->private;
+
 	seq_printf(seq, "TCP6: inuse %d\n",
-		       fold_prot_inuse(&tcpv6_prot));
+		       sock_prot_inuse_get(net, &tcpv6_prot));
 	seq_printf(seq, "UDP6: inuse %d\n",
-		       fold_prot_inuse(&udpv6_prot));
+		       sock_prot_inuse_get(net, &udpv6_prot));
+	seq_printf(seq, "UDPLITE6: inuse %d\n",
+			sock_prot_inuse_get(net, &udplitev6_prot));
 	seq_printf(seq, "RAW6: inuse %d\n",
-		       fold_prot_inuse(&rawv6_prot));
+		       sock_prot_inuse_get(net, &rawv6_prot));
 	seq_printf(seq, "FRAG6: inuse %d memory %d\n",
-		       ip6_frag_nqueues, atomic_read(&ip6_frag_mem));
+		       ip6_frag_nqueues(net), ip6_frag_mem(net));
 	return 0;
 }
 
-static struct snmp_mib snmp6_ipstats_list[] = {
+static int sockstat6_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open_net(inode, file, sockstat6_seq_show);
+}
+
+static const struct file_operations sockstat6_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = sockstat6_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release_net,
+};
+
+static const struct snmp_mib snmp6_ipstats_list[] = {
 /* ipv6 mib according to RFC 2465 */
-	SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INRECEIVES),
+	SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS),
 	SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS),
 	SNMP_MIB_ITEM("Ip6InTooBigErrors", IPSTATS_MIB_INTOOBIGERRORS),
 	SNMP_MIB_ITEM("Ip6InNoRoutes", IPSTATS_MIB_INNOROUTES),
@@ -69,7 +72,7 @@ static struct snmp_mib snmp6_ipstats_list[] = {
 	SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS),
 	SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS),
 	SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
-	SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS),
+	SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTPKTS),
 	SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS),
 	SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
 	SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
@@ -81,117 +84,183 @@ static struct snmp_mib snmp6_ipstats_list[] = {
 	SNMP_MIB_ITEM("Ip6FragCreates", IPSTATS_MIB_FRAGCREATES),
 	SNMP_MIB_ITEM("Ip6InMcastPkts", IPSTATS_MIB_INMCASTPKTS),
 	SNMP_MIB_ITEM("Ip6OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS),
+	SNMP_MIB_ITEM("Ip6InOctets", IPSTATS_MIB_INOCTETS),
+	SNMP_MIB_ITEM("Ip6OutOctets", IPSTATS_MIB_OUTOCTETS),
+	SNMP_MIB_ITEM("Ip6InMcastOctets", IPSTATS_MIB_INMCASTOCTETS),
+	SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
+	SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
+	SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+	/* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */
+	SNMP_MIB_ITEM("Ip6InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
+	SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
+	SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
+	SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp6_icmp6_list[] = {
-/* icmpv6 mib according to RFC 2466
-
-   Exceptions:  {In|Out}AdminProhibs are removed, because I see
-                no good reasons to account them separately
-		of another dest.unreachs.
-		OutErrs is zero identically.
-		OutEchos too.
-		OutRouterAdvertisements too.
-		OutGroupMembQueries too.
- */
+static const struct snmp_mib snmp6_icmp6_list[] = {
+/* icmpv6 mib according to RFC 2466 */
 	SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
 	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
-	SNMP_MIB_ITEM("Icmp6InDestUnreachs", ICMP6_MIB_INDESTUNREACHS),
-	SNMP_MIB_ITEM("Icmp6InPktTooBigs", ICMP6_MIB_INPKTTOOBIGS),
-	SNMP_MIB_ITEM("Icmp6InTimeExcds", ICMP6_MIB_INTIMEEXCDS),
-	SNMP_MIB_ITEM("Icmp6InParmProblems", ICMP6_MIB_INPARMPROBLEMS),
-	SNMP_MIB_ITEM("Icmp6InEchos", ICMP6_MIB_INECHOS),
-	SNMP_MIB_ITEM("Icmp6InEchoReplies", ICMP6_MIB_INECHOREPLIES),
-	SNMP_MIB_ITEM("Icmp6InGroupMembQueries", ICMP6_MIB_INGROUPMEMBQUERIES),
-	SNMP_MIB_ITEM("Icmp6InGroupMembResponses", ICMP6_MIB_INGROUPMEMBRESPONSES),
-	SNMP_MIB_ITEM("Icmp6InGroupMembReductions", ICMP6_MIB_INGROUPMEMBREDUCTIONS),
-	SNMP_MIB_ITEM("Icmp6InRouterSolicits", ICMP6_MIB_INROUTERSOLICITS),
-	SNMP_MIB_ITEM("Icmp6InRouterAdvertisements", ICMP6_MIB_INROUTERADVERTISEMENTS),
-	SNMP_MIB_ITEM("Icmp6InNeighborSolicits", ICMP6_MIB_INNEIGHBORSOLICITS),
-	SNMP_MIB_ITEM("Icmp6InNeighborAdvertisements", ICMP6_MIB_INNEIGHBORADVERTISEMENTS),
-	SNMP_MIB_ITEM("Icmp6InRedirects", ICMP6_MIB_INREDIRECTS),
 	SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
-	SNMP_MIB_ITEM("Icmp6OutDestUnreachs", ICMP6_MIB_OUTDESTUNREACHS),
-	SNMP_MIB_ITEM("Icmp6OutPktTooBigs", ICMP6_MIB_OUTPKTTOOBIGS),
-	SNMP_MIB_ITEM("Icmp6OutTimeExcds", ICMP6_MIB_OUTTIMEEXCDS),
-	SNMP_MIB_ITEM("Icmp6OutParmProblems", ICMP6_MIB_OUTPARMPROBLEMS),
-	SNMP_MIB_ITEM("Icmp6OutEchoReplies", ICMP6_MIB_OUTECHOREPLIES),
-	SNMP_MIB_ITEM("Icmp6OutRouterSolicits", ICMP6_MIB_OUTROUTERSOLICITS),
-	SNMP_MIB_ITEM("Icmp6OutNeighborSolicits", ICMP6_MIB_OUTNEIGHBORSOLICITS),
-	SNMP_MIB_ITEM("Icmp6OutNeighborAdvertisements", ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS),
-	SNMP_MIB_ITEM("Icmp6OutRedirects", ICMP6_MIB_OUTREDIRECTS),
-	SNMP_MIB_ITEM("Icmp6OutGroupMembResponses", ICMP6_MIB_OUTGROUPMEMBRESPONSES),
-	SNMP_MIB_ITEM("Icmp6OutGroupMembReductions", ICMP6_MIB_OUTGROUPMEMBREDUCTIONS),
+	SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
+	SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp6_udp6_list[] = {
+/* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */
+static const char *const icmp6type2name[256] = {
+	[ICMPV6_DEST_UNREACH] = "DestUnreachs",
+	[ICMPV6_PKT_TOOBIG] = "PktTooBigs",
+	[ICMPV6_TIME_EXCEED] = "TimeExcds",
+	[ICMPV6_PARAMPROB] = "ParmProblems",
+	[ICMPV6_ECHO_REQUEST] = "Echos",
+	[ICMPV6_ECHO_REPLY] = "EchoReplies",
+	[ICMPV6_MGM_QUERY] = "GroupMembQueries",
+	[ICMPV6_MGM_REPORT] = "GroupMembResponses",
+	[ICMPV6_MGM_REDUCTION] = "GroupMembReductions",
+	[ICMPV6_MLD2_REPORT] = "MLDv2Reports",
+	[NDISC_ROUTER_ADVERTISEMENT] = "RouterAdvertisements",
+	[NDISC_ROUTER_SOLICITATION] = "RouterSolicits",
+	[NDISC_NEIGHBOUR_ADVERTISEMENT] = "NeighborAdvertisements",
+	[NDISC_NEIGHBOUR_SOLICITATION] = "NeighborSolicits",
+	[NDISC_REDIRECT] = "Redirects",
+};
+
+
+static const struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS),
 	SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
 	SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+	SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+	SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+	SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
+	SNMP_MIB_SENTINEL
+};
+
+static const struct snmp_mib snmp6_udplite6_list[] = {
+	SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS),
+	SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
+	SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
+	SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+	SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+	SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+	SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
-static unsigned long
-fold_field(void *mib[], int offt)
+static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
 {
-        unsigned long res = 0;
-        int i;
- 
-        for_each_cpu(i) {
-                res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
-                res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
-        }
-        return res;
+	char name[32];
+	int i;
+
+	/* print by name -- deprecated items */
+	for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
+		int icmptype;
+		const char *p;
+
+		icmptype = i & 0xff;
+		p = icmp6type2name[icmptype];
+		if (!p)	/* don't print un-named types here */
+			continue;
+		snprintf(name, sizeof(name), "Icmp6%s%s",
+			i & 0x100 ? "Out" : "In", p);
+		seq_printf(seq, "%-32s\t%lu\n", name,
+			   atomic_long_read(smib + i));
+	}
+
+	/* print by number (nonzero only) - ICMPMsgStat format */
+	for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
+		unsigned long val;
+
+		val = atomic_long_read(smib + i);
+		if (!val)
+			continue;
+		snprintf(name, sizeof(name), "Icmp6%sType%u",
+			i & 0x100 ?  "Out" : "In", i & 0xff);
+		seq_printf(seq, "%-32s\t%lu\n", name, val);
+	}
 }
 
-static inline void
-snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
+/* can be called either with percpu mib (pcpumib != NULL),
+ * or shared one (smib != NULL)
+ */
+static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
+				atomic_long_t *smib,
+				const struct snmp_mib *itemlist)
 {
 	int i;
-	for (i=0; itemlist[i].name; i++)
-		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, 
-				fold_field(mib, itemlist[i].entry));
+	unsigned long val;
+
+	for (i = 0; itemlist[i].name; i++) {
+		val = pcpumib ?
+			snmp_fold_field(pcpumib, itemlist[i].entry) :
+			atomic_long_read(smib + itemlist[i].entry);
+		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val);
+	}
 }
 
-static int snmp6_seq_show(struct seq_file *seq, void *v)
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
+				  const struct snmp_mib *itemlist, size_t syncpoff)
 {
-	struct inet6_dev *idev = (struct inet6_dev *)seq->private;
+	int i;
 
-	if (idev) {
-		seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
-		snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
-	} else {
-		snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
-		snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
-		snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
-	}
+	for (i = 0; itemlist[i].name; i++)
+		seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
+			   snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+}
+
+static int snmp6_seq_show(struct seq_file *seq, void *v)
+{
+	struct net *net = (struct net *)seq->private;
+
+	snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,
+			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
+	snmp6_seq_show_item(seq, net->mib.icmpv6_statistics,
+			    NULL, snmp6_icmp6_list);
+	snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
+	snmp6_seq_show_item(seq, net->mib.udp_stats_in6,
+			    NULL, snmp6_udp6_list);
+	snmp6_seq_show_item(seq, net->mib.udplite_stats_in6,
+			    NULL, snmp6_udplite6_list);
 	return 0;
 }
 
-static int sockstat6_seq_open(struct inode *inode, struct file *file)
+static int snmp6_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, sockstat6_seq_show, NULL);
+	return single_open_net(inode, file, snmp6_seq_show);
 }
 
-static struct file_operations sockstat6_seq_fops = {
+static const struct file_operations snmp6_seq_fops = {
 	.owner	 = THIS_MODULE,
-	.open	 = sockstat6_seq_open,
+	.open	 = snmp6_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = single_release,
+	.release = single_release_net,
 };
 
-static int snmp6_seq_open(struct inode *inode, struct file *file)
+static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
 {
-	return single_open(file, snmp6_seq_show, PDE(inode)->data);
+	struct inet6_dev *idev = (struct inet6_dev *)seq->private;
+
+	seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
+	snmp6_seq_show_item64(seq, idev->stats.ipv6,
+			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
+	snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
+			    snmp6_icmp6_list);
+	snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs);
+	return 0;
 }
 
-static struct file_operations snmp6_seq_fops = {
+static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, snmp6_dev_seq_show, PDE_DATA(inode));
+}
+
+static const struct file_operations snmp6_dev_seq_fops = {
 	.owner	 = THIS_MODULE,
-	.open	 = snmp6_seq_open,
+	.open	 = snmp6_dev_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
 	.release = single_release,
@@ -200,102 +269,77 @@ static struct file_operations snmp6_seq_fops = {
 int snmp6_register_dev(struct inet6_dev *idev)
 {
 	struct proc_dir_entry *p;
+	struct net *net;
 
 	if (!idev || !idev->dev)
 		return -EINVAL;
 
-	if (!proc_net_devsnmp6)
+	net = dev_net(idev->dev);
+	if (!net->mib.proc_net_devsnmp6)
 		return -ENOENT;
 
-	p = create_proc_entry(idev->dev->name, S_IRUGO, proc_net_devsnmp6);
+	p = proc_create_data(idev->dev->name, S_IRUGO,
+			     net->mib.proc_net_devsnmp6,
+			     &snmp6_dev_seq_fops, idev);
 	if (!p)
 		return -ENOMEM;
 
-	p->data = idev;
-	p->proc_fops = &snmp6_seq_fops;
-
 	idev->stats.proc_dir_entry = p;
 	return 0;
 }
 
 int snmp6_unregister_dev(struct inet6_dev *idev)
 {
-	if (!proc_net_devsnmp6)
+	struct net *net = dev_net(idev->dev);
+	if (!net->mib.proc_net_devsnmp6)
 		return -ENOENT;
-	if (!idev || !idev->stats.proc_dir_entry)
+	if (!idev->stats.proc_dir_entry)
 		return -EINVAL;
-	remove_proc_entry(idev->stats.proc_dir_entry->name,
-			  proc_net_devsnmp6);
+	proc_remove(idev->stats.proc_dir_entry);
+	idev->stats.proc_dir_entry = NULL;
 	return 0;
 }
 
-int __init ipv6_misc_proc_init(void)
+static int __net_init ipv6_proc_init_net(struct net *net)
 {
-	int rc = 0;
+	if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+			 &sockstat6_seq_fops))
+		return -ENOMEM;
 
-	if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
+	if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
 		goto proc_snmp6_fail;
 
-	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
-	if (!proc_net_devsnmp6)
+	net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
+	if (!net->mib.proc_net_devsnmp6)
 		goto proc_dev_snmp6_fail;
+	return 0;
 
-	if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
-		goto proc_sockstat6_fail;
-out:
-	return rc;
-
-proc_sockstat6_fail:
-	proc_net_remove("dev_snmp6");
 proc_dev_snmp6_fail:
-	proc_net_remove("snmp6");
+	remove_proc_entry("snmp6", net->proc_net);
 proc_snmp6_fail:
-	rc = -ENOMEM;
-	goto out;
-}
-
-void ipv6_misc_proc_exit(void)
-{
-	proc_net_remove("sockstat6");
-	proc_net_remove("dev_snmp6");
-	proc_net_remove("snmp6");
+	remove_proc_entry("sockstat6", net->proc_net);
+	return -ENOMEM;
 }
 
-#else	/* CONFIG_PROC_FS */
-
-
-int snmp6_register_dev(struct inet6_dev *idev)
+static void __net_exit ipv6_proc_exit_net(struct net *net)
 {
-	return 0;
+	remove_proc_entry("sockstat6", net->proc_net);
+	remove_proc_entry("dev_snmp6", net->proc_net);
+	remove_proc_entry("snmp6", net->proc_net);
 }
 
-int snmp6_unregister_dev(struct inet6_dev *idev)
-{
-	return 0;
-}
-#endif	/* CONFIG_PROC_FS */
+static struct pernet_operations ipv6_proc_ops = {
+	.init = ipv6_proc_init_net,
+	.exit = ipv6_proc_exit_net,
+};
 
-int snmp6_alloc_dev(struct inet6_dev *idev)
+int __init ipv6_misc_proc_init(void)
 {
-	int err = -ENOMEM;
-
-	if (!idev || !idev->dev)
-		return -EINVAL;
-
-	if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
-			   __alignof__(struct icmpv6_mib)) < 0)
-		goto err_icmp;
-
-	return 0;
-
-err_icmp:
-	return err;
+	return register_pernet_subsys(&ipv6_proc_ops);
 }
 
-int snmp6_free_dev(struct inet6_dev *idev)
+void ipv6_misc_proc_exit(void)
 {
-	snmp6_mib_free((void **)idev->stats.icmpv6);
-	return 0;
+	unregister_pernet_subsys(&ipv6_proc_ops);
 }
 
-
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 52c1d58b6ca..e048cf1bb6a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -5,8 +5,6 @@
  *
  *		PF_INET6 protocol dispatch tables.
  *
- * Version:	$Id: protocol.c,v 1.10 2001/05/18 02:25:49 davem Exp $
- *
  * Authors:	Pedro Roque	<roque@di.fc.ul.pt>
  *
  *		This program is free software; you can redistribute it and/or
@@ -22,65 +20,54 @@
  *      - Removed unused variable 'inet6_protocol_base'
  *      - Modified inet6_del_protocol() to correctly maintain copy bit.
  */
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/sched.h>
-#include <linux/net.h>
-#include <linux/in6.h>
+#include <linux/module.h>
 #include <linux/netdevice.h>
-#include <linux/if_arp.h>
-
-#include <net/sock.h>
-#include <net/snmp.h>
-
-#include <net/ipv6.h>
+#include <linux/spinlock.h>
 #include <net/protocol.h>
 
-struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
-static DEFINE_SPINLOCK(inet6_proto_lock);
+#if IS_ENABLED(CONFIG_IPV6)
+const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet6_protos);
 
-
-int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
+int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
-	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+	return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
+			NULL, prot) ? 0 : -1;
+}
+EXPORT_SYMBOL(inet6_add_protocol);
 
-	spin_lock_bh(&inet6_proto_lock);
+int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
+{
+	int ret;
 
-	if (inet6_protos[hash]) {
-		ret = -1;
-	} else {
-		inet6_protos[hash] = prot;
-		ret = 0;
-	}
+	ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
+		       prot, NULL) == prot) ? 0 : -1;
 
-	spin_unlock_bh(&inet6_proto_lock);
+	synchronize_net();
 
 	return ret;
 }
+EXPORT_SYMBOL(inet6_del_protocol);
+#endif
 
-/*
- *	Remove a protocol from the hash tables.
- */
- 
-int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
-{
-	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly;
 
-	spin_lock_bh(&inet6_proto_lock);
+int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
+{
+	return !cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
+			NULL, prot) ? 0 : -1;
+}
+EXPORT_SYMBOL(inet6_add_offload);
 
-	if (inet6_protos[hash] != prot) {
-		ret = -1;
-	} else {
-		inet6_protos[hash] = NULL;
-		ret = 0;
-	}
+int inet6_del_offload(const struct net_offload *prot, unsigned char protocol)
+{
+	int ret;
 
-	spin_unlock_bh(&inet6_proto_lock);
+	ret = (cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
+		       prot, NULL) == prot) ? 0 : -1;
 
 	synchronize_net();
 
 	return ret;
 }
+EXPORT_SYMBOL(inet6_del_offload);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a1265a320b1..b2dc60b0c76 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1,17 +1,15 @@
 /*
  *	RAW sockets for IPv6
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	Adapted from linux/net/ipv4/raw.c
  *
- *	$Id: raw.c,v 1.51 2002/02/01 22:01:04 davem Exp $
- *
  *	Fixes:
  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
- *	YOSHIFUJI,H.@USAGI	:	raw checksum (RFC2292(bis) compliance) 
+ *	YOSHIFUJI,H.@USAGI	:	raw checksum (RFC2292(bis) compliance)
  *	Kazunori MIYAZAWA @USAGI:	change process style to use ip6_append_data
  *
  *	This program is free software; you can redistribute it and/or
@@ -23,8 +21,8 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/slab.h>
 #include <linux/sockios.h>
-#include <linux/sched.h>
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
@@ -32,10 +30,12 @@
 #include <linux/icmpv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/skbuff.h>
+#include <linux/compat.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
-#include <asm/bug.h>
 
+#include <net/net_namespace.h>
 #include <net/ip.h>
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -50,57 +50,46 @@
 #include <net/udp.h>
 #include <net/inet_common.h>
 #include <net/tcp_states.h>
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+#include <net/mip6.h>
+#endif
+#include <linux/mroute6.h>
 
+#include <net/raw.h>
 #include <net/rawv6.h>
 #include <net/xfrm.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/export.h>
 
-struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
-DEFINE_RWLOCK(raw_v6_lock);
-
-static void raw_v6_hash(struct sock *sk)
-{
-	struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num &
-						 (RAWV6_HTABLE_SIZE - 1)];
+#define	ICMPV6_HDRLEN	4	/* ICMPv6 header, RFC 4443 Section 2.1 */
 
-	write_lock_bh(&raw_v6_lock);
-	sk_add_node(sk, list);
-	sock_prot_inc_use(sk->sk_prot);
- 	write_unlock_bh(&raw_v6_lock);
-}
+static struct raw_hashinfo raw_v6_hashinfo = {
+	.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
+};
 
-static void raw_v6_unhash(struct sock *sk)
+static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
+		unsigned short num, const struct in6_addr *loc_addr,
+		const struct in6_addr *rmt_addr, int dif)
 {
- 	write_lock_bh(&raw_v6_lock);
-	if (sk_del_node_init(sk))
-		sock_prot_dec_use(sk->sk_prot);
-	write_unlock_bh(&raw_v6_lock);
-}
+	bool is_multicast = ipv6_addr_is_multicast(loc_addr);
 
+	sk_for_each_from(sk)
+		if (inet_sk(sk)->inet_num == num) {
 
-/* Grumble... icmp and ip_input want to get at this... */
-struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
-			     struct in6_addr *loc_addr, struct in6_addr *rmt_addr,
-			     int dif)
-{
-	struct hlist_node *node;
-	int is_multicast = ipv6_addr_is_multicast(loc_addr);
-
-	sk_for_each_from(sk, node)
-		if (inet_sk(sk)->num == num) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
+			if (!net_eq(sock_net(sk), net))
+				continue;
 
-			if (!ipv6_addr_any(&np->daddr) &&
-			    !ipv6_addr_equal(&np->daddr, rmt_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
 			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
 				continue;
 
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+				if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
 					goto found;
 				if (is_multicast &&
 				    inet6_mc_check(sk, loc_addr, rmt_addr))
@@ -118,22 +107,46 @@ found:
  *	0 - deliver
  *	1 - block
  */
-static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
+static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
 {
-	struct icmp6hdr *icmph;
-	struct raw6_sock *rp = raw6_sk(sk);
+	struct icmp6hdr _hdr;
+	const struct icmp6hdr *hdr;
 
-	if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
-		__u32 *data = &rp->filter.data[0];
-		int bit_nr;
-
-		icmph = (struct icmp6hdr *) skb->data;
-		bit_nr = icmph->icmp6_type;
+	/* We require only the four bytes of the ICMPv6 header, not any
+	 * additional bytes of message body in "struct icmp6hdr".
+	 */
+	hdr = skb_header_pointer(skb, skb_transport_offset(skb),
+				 ICMPV6_HDRLEN, &_hdr);
+	if (hdr) {
+		const __u32 *data = &raw6_sk(sk)->filter.data[0];
+		unsigned int type = hdr->icmp6_type;
 
-		return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
+		return (data[type >> 5] & (1U << (type & 31))) != 0;
 	}
+	return 1;
+}
+
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
+
+static mh_filter_t __rcu *mh_filter __read_mostly;
+
+int rawv6_mh_filter_register(mh_filter_t filter)
+{
+	rcu_assign_pointer(mh_filter, filter);
+	return 0;
+}
+EXPORT_SYMBOL(rawv6_mh_filter_register);
+
+int rawv6_mh_filter_unregister(mh_filter_t filter)
+{
+	RCU_INIT_POINTER(mh_filter, NULL);
+	synchronize_rcu();
 	return 0;
 }
+EXPORT_SYMBOL(rawv6_mh_filter_unregister);
+
+#endif
 
 /*
  *	demultiplex raw sockets.
@@ -142,66 +155,110 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
  *
  *	Caller owns SKB so we must make clones.
  */
-int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
+static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 {
-	struct in6_addr *saddr;
-	struct in6_addr *daddr;
+	const struct in6_addr *saddr;
+	const struct in6_addr *daddr;
 	struct sock *sk;
-	int delivered = 0;
+	bool delivered = false;
 	__u8 hash;
+	struct net *net;
 
-	saddr = &skb->nh.ipv6h->saddr;
+	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = saddr + 1;
 
-	hash = nexthdr & (MAX_INET_PROTOS - 1);
+	hash = nexthdr & (RAW_HTABLE_SIZE - 1);
 
-	read_lock(&raw_v6_lock);
-	sk = sk_head(&raw_v6_htable[hash]);
-
-	/*
-	 *	The first socket found will be delivered after
-	 *	delivery to transport protocols.
-	 */
+	read_lock(&raw_v6_hashinfo.lock);
+	sk = sk_head(&raw_v6_hashinfo.ht[hash]);
 
 	if (sk == NULL)
 		goto out;
 
-	sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
+	net = dev_net(skb->dev);
+	sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
 
 	while (sk) {
-		delivered = 1;
-		if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
+		int filtered;
+
+		delivered = true;
+		switch (nexthdr) {
+		case IPPROTO_ICMPV6:
+			filtered = icmpv6_filter(sk, skb);
+			break;
+
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+		case IPPROTO_MH:
+		{
+			/* XXX: To validate MH only once for each packet,
+			 * this is placed here. It should be after checking
+			 * xfrm policy, however it doesn't. The checking xfrm
+			 * policy is placed in rawv6_rcv() because it is
+			 * required for each socket.
+			 */
+			mh_filter_t *filter;
+
+			filter = rcu_dereference(mh_filter);
+			filtered = filter ? (*filter)(sk, skb) : 0;
+			break;
+		}
+#endif
+		default:
+			filtered = 0;
+			break;
+		}
+
+		if (filtered < 0)
+			break;
+		if (filtered == 0) {
 			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
 
 			/* Not releasing hash table! */
-			if (clone)
+			if (clone) {
+				nf_reset(clone);
 				rawv6_rcv(sk, clone);
+			}
 		}
-		sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
+		sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
 				     IP6CB(skb)->iif);
 	}
 out:
-	read_unlock(&raw_v6_lock);
+	read_unlock(&raw_v6_hashinfo.lock);
 	return delivered;
 }
 
+bool raw6_local_deliver(struct sk_buff *skb, int nexthdr)
+{
+	struct sock *raw_sk;
+
+	raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]);
+	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
+		raw_sk = NULL;
+
+	return raw_sk != NULL;
+}
+
 /* This cleans up af_inet6 a bit. -DaveM */
 static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
-	__u32 v4addr = 0;
+	__be32 v4addr = 0;
 	int addr_type;
 	int err;
 
 	if (addr_len < SIN6_LEN_RFC2133)
 		return -EINVAL;
+
+	if (addr->sin6_family != AF_INET6)
+		return -EINVAL;
+
 	addr_type = ipv6_addr_type(&addr->sin6_addr);
 
 	/* Raw sockets are IPv6 only */
 	if (addr_type == IPV6_ADDR_MAPPED)
-		return(-EADDRNOTAVAIL);
+		return -EADDRNOTAVAIL;
 
 	lock_sock(sk);
 
@@ -209,11 +266,12 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (sk->sk_state != TCP_CLOSE)
 		goto out;
 
+	rcu_read_lock();
 	/* Check if the address belongs to the host. */
 	if (addr_type != IPV6_ADDR_ANY) {
 		struct net_device *dev = NULL;
 
-		if (addr_type & IPV6_ADDR_LINKLOCAL) {
+		if (__ipv6_addr_needs_scope_id(addr_type)) {
 			if (addr_len >= sizeof(struct sockaddr_in6) &&
 			    addr->sin6_scope_id) {
 				/* Override any existing binding, if another
@@ -221,47 +279,46 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 				 */
 				sk->sk_bound_dev_if = addr->sin6_scope_id;
 			}
-			
+
 			/* Binding to link-local address requires an interface */
 			if (!sk->sk_bound_dev_if)
-				goto out;
+				goto out_unlock;
 
-			dev = dev_get_by_index(sk->sk_bound_dev_if);
-			if (!dev) {
-				err = -ENODEV;
-				goto out;
-			}
+			err = -ENODEV;
+			dev = dev_get_by_index_rcu(sock_net(sk),
+						   sk->sk_bound_dev_if);
+			if (!dev)
+				goto out_unlock;
 		}
-		
+
 		/* ipv4 addr of the socket is invalid.  Only the
 		 * unspecified and mapped address have a v4 equivalent.
 		 */
 		v4addr = LOOPBACK4_IPV6;
 		if (!(addr_type & IPV6_ADDR_MULTICAST))	{
 			err = -EADDRNOTAVAIL;
-			if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
-				if (dev)
-					dev_put(dev);
-				goto out;
+			if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
+					   dev, 0)) {
+				goto out_unlock;
 			}
 		}
-		if (dev)
-			dev_put(dev);
 	}
 
-	inet->rcv_saddr = inet->saddr = v4addr;
-	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
+	sk->sk_v6_rcv_saddr = addr->sin6_addr;
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
-		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+		np->saddr = addr->sin6_addr;
 	err = 0;
+out_unlock:
+	rcu_read_unlock();
 out:
 	release_sock(sk);
 	return err;
 }
 
-void rawv6_err(struct sock *sk, struct sk_buff *skb,
+static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 	       struct inet6_skb_parm *opt,
-	       int type, int code, int offset, u32 info)
+	       u8 type, u8 code, int offset, __be32 info)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -277,9 +334,14 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb,
 		return;
 
 	harderr = icmpv6_err_convert(type, code, &err);
-	if (type == ICMPV6_PKT_TOOBIG)
+	if (type == ICMPV6_PKT_TOOBIG) {
+		ip6_sk_update_pmtu(skb, sk, info);
 		harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
-
+	}
+	if (type == NDISC_REDIRECT) {
+		ip6_sk_redirect(skb, sk);
+		return;
+	}
 	if (np->recverr) {
 		u8 *payload = skb->data;
 		if (!inet->hdrincl)
@@ -293,33 +355,59 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb,
 	}
 }
 
-static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
+void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
+		u8 type, u8 code, int inner_offset, __be32 info)
 {
-	if ((raw6_sk(sk)->checksum || sk->sk_filter) && 
-	    skb->ip_summed != CHECKSUM_UNNECESSARY) {
-		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
-			/* FIXME: increment a raw6 drops counter here */
-			kfree_skb(skb);
-			return 0;
+	struct sock *sk;
+	int hash;
+	const struct in6_addr *saddr, *daddr;
+	struct net *net;
+
+	hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+
+	read_lock(&raw_v6_hashinfo.lock);
+	sk = sk_head(&raw_v6_hashinfo.ht[hash]);
+	if (sk != NULL) {
+		/* Note: ipv6_hdr(skb) != skb->data */
+		const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
+		saddr = &ip6h->saddr;
+		daddr = &ip6h->daddr;
+		net = dev_net(skb->dev);
+
+		while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
+						IP6CB(skb)->iif))) {
+			rawv6_err(sk, skb, NULL, type, code,
+					inner_offset, info);
+			sk = sk_next(sk);
 		}
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	read_unlock(&raw_v6_hashinfo.lock);
+}
+
+static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
+	    skb_checksum_complete(skb)) {
+		atomic_inc(&sk->sk_drops);
+		kfree_skb(skb);
+		return NET_RX_DROP;
 	}
 
 	/* Charge it to the socket. */
-	if (sock_queue_rcv_skb(sk,skb)<0) {
-		/* FIXME: increment a raw6 drops counter here */
+	skb_dst_drop(skb);
+	if (sock_queue_rcv_skb(sk, skb) < 0) {
 		kfree_skb(skb);
-		return 0;
+		return NET_RX_DROP;
 	}
 
 	return 0;
 }
 
 /*
- *	This is next to useless... 
+ *	This is next to useless...
  *	if we demultiplex in network layer we don't need the extra call
- *	just to queue the skb... 
- *	maybe we could have the network decide upon a hint if it 
+ *	just to queue the skb...
+ *	maybe we could have the network decide upon a hint if it
  *	should call raw_rcv for demultiplexing
  */
 int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
@@ -327,40 +415,35 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 	struct inet_sock *inet = inet_sk(sk);
 	struct raw6_sock *rp = raw6_sk(sk);
 
-        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
-                kfree_skb(skb);
-                return NET_RX_DROP;
-        }
+	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		atomic_inc(&sk->sk_drops);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
 
 	if (!rp->checksum)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
-		if (skb->ip_summed == CHECKSUM_HW) {
-			skb_postpull_rcsum(skb, skb->nh.raw,
-			                   skb->h.raw - skb->nh.raw);
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		skb_postpull_rcsum(skb, skb_network_header(skb),
+				   skb_network_header_len(skb));
+		if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+				     &ipv6_hdr(skb)->daddr,
+				     skb->len, inet->inet_num, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-					    &skb->nh.ipv6h->daddr,
-					    skb->len, inet->num, skb->csum)) {
-				LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n");
-				skb->ip_summed = CHECKSUM_NONE;
-			}
-		}
-		if (skb->ip_summed == CHECKSUM_NONE)
-			skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-						     &skb->nh.ipv6h->daddr,
-						     skb->len, inet->num, 0);
 	}
+	if (!skb_csum_unnecessary(skb))
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+							 &ipv6_hdr(skb)->daddr,
+							 skb->len,
+							 inet->inet_num, 0));
 
 	if (inet->hdrincl) {
-		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-		    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
-			/* FIXME: increment a raw6 drops counter here */
+		if (skb_checksum_complete(skb)) {
+			atomic_inc(&sk->sk_drops);
 			kfree_skb(skb);
-			return 0;
+			return NET_RX_DROP;
 		}
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
 	rawv6_rcv_skb(sk, skb);
@@ -378,34 +461,34 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		  int noblock, int flags, int *addr_len)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
+	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
 	struct sk_buff *skb;
 	size_t copied;
 	int err;
 
 	if (flags & MSG_OOB)
 		return -EOPNOTSUPP;
-		
-	if (addr_len) 
-		*addr_len=sizeof(*sin6);
 
 	if (flags & MSG_ERRQUEUE)
-		return ipv6_recv_error(sk, msg, len);
+		return ipv6_recv_error(sk, msg, len, addr_len);
+
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
 
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb)
 		goto out;
 
 	copied = skb->len;
-  	if (copied > len) {
-  		copied = len;
-  		msg->msg_flags |= MSG_TRUNC;
-  	}
+	if (copied > len) {
+		copied = len;
+		msg->msg_flags |= MSG_TRUNC;
+	}
 
-	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+	if (skb_csum_unnecessary(skb)) {
 		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	} else if (msg->msg_flags&MSG_TRUNC) {
-		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+		if (__skb_checksum_complete(skb))
 			goto csum_copy_err;
 		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	} else {
@@ -419,17 +502,18 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	/* Copy the address. */
 	if (sin6) {
 		sin6->sin6_family = AF_INET6;
-		ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+		sin6->sin6_port = 0;
+		sin6->sin6_addr = ipv6_hdr(skb)->saddr;
 		sin6->sin6_flowinfo = 0;
-		sin6->sin6_scope_id = 0;
-		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-			sin6->sin6_scope_id = IP6CB(skb)->iif;
+		sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
+							  IP6CB(skb)->iif);
+		*addr_len = sizeof(*sin6);
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (np->rxopt.all)
-		datagram_recv_ctl(sk, msg, skb);
+		ip6_datagram_recv_ctl(sk, msg, skb);
 
 	err = copied;
 	if (flags & MSG_TRUNC)
@@ -441,28 +525,16 @@ out:
 	return err;
 
 csum_copy_err:
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
+	skb_kill_datagram(sk, skb, flags);
 
 	/* Error for blocking case is chosen to masquerade
 	   as some normal condition.
 	 */
 	err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
-	/* FIXME: increment a raw6 drops counter here */
-	goto out_free;
+	goto out;
 }
 
-static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
+static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 				     struct raw6_sock *rp)
 {
 	struct sk_buff *skb;
@@ -470,8 +542,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 	int offset;
 	int len;
 	int total_len;
-	u32 tmp_csum;
-	u16 csum;
+	__wsum tmp_csum;
+	__sum16 csum;
 
 	if (!rp->checksum)
 		goto send;
@@ -480,7 +552,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 		goto out;
 
 	offset = rp->offset;
-	total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data);
+	total_len = inet_sk(sk)->cork.base.length;
 	if (offset >= total_len - 1) {
 		err = -EINVAL;
 		ip6_flush_pending_frames(sk);
@@ -503,7 +575,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 			if (csum_skb)
 				continue;
 
-			len = skb->len - (skb->h.raw - skb->data);
+			len = skb->len - skb_transport_offset(skb);
 			if (offset >= len) {
 				offset -= len;
 				continue;
@@ -515,22 +587,20 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 		skb = csum_skb;
 	}
 
-	offset += skb->h.raw - skb->data;
+	offset += skb_transport_offset(skb);
 	if (skb_copy_bits(skb, offset, &csum, 2))
 		BUG();
 
 	/* in case cksum was not initialized */
 	if (unlikely(csum))
-		tmp_csum = csum_sub(tmp_csum, csum);
+		tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
 
-	tmp_csum = csum_ipv6_magic(&fl->fl6_src,
-				   &fl->fl6_dst,
-				   total_len, fl->proto, tmp_csum);
+	csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
+			       total_len, fl6->flowi6_proto, tmp_csum);
 
-	if (tmp_csum == 0)
-		tmp_csum = -1;
+	if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP)
+		csum = CSUM_MANGLED_0;
 
-	csum = tmp_csum;
 	if (skb_store_bits(skb, offset, &csum, 2))
 		BUG();
 
@@ -541,47 +611,53 @@ out:
 }
 
 static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
-			struct flowi *fl, struct rt6_info *rt, 
+			struct flowi6 *fl6, struct dst_entry **dstp,
 			unsigned int flags)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6hdr *iph;
 	struct sk_buff *skb;
-	unsigned int hh_len;
 	int err;
+	struct rt6_info *rt = (struct rt6_info *)*dstp;
+	int hlen = LL_RESERVED_SPACE(rt->dst.dev);
+	int tlen = rt->dst.dev->needed_tailroom;
 
-	if (length > rt->u.dst.dev->mtu) {
-		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
+	if (length > rt->dst.dev->mtu) {
+		ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
 	if (flags&MSG_PROBE)
 		goto out;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
-	skb = sock_alloc_send_skb(sk, length+hh_len+15,
-				  flags&MSG_DONTWAIT, &err);
+	skb = sock_alloc_send_skb(sk,
+				  length + hlen + tlen + 15,
+				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
-		goto error; 
-	skb_reserve(skb, hh_len);
+		goto error;
+	skb_reserve(skb, hlen);
 
+	skb->protocol = htons(ETH_P_IPV6);
 	skb->priority = sk->sk_priority;
-	skb->dst = dst_clone(&rt->u.dst);
+	skb->mark = sk->sk_mark;
+	skb_dst_set(skb, &rt->dst);
+	*dstp = NULL;
 
-	skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
+	skb_put(skb, length);
+	skb_reset_network_header(skb);
+	iph = ipv6_hdr(skb);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	err = memcpy_fromiovecend((void *)iph, from, 0, length);
 	if (err)
 		goto error_fault;
 
-	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);		
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		      dst_output);
+	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+		      rt->dst.dev, dst_output);
 	if (err > 0)
-		err = np->recverr ? net_xmit_errno(err) : 0;
+		err = net_xmit_errno(err);
 	if (err)
 		goto error;
 out:
@@ -591,27 +667,30 @@ error_fault:
 	err = -EFAULT;
 	kfree_skb(skb);
 error:
-	IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
-	return err; 
+	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+	if (err == -ENOBUFS && !np->recverr)
+		err = 0;
+	return err;
 }
 
-static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
+static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg)
 {
 	struct iovec *iov;
 	u8 __user *type = NULL;
 	u8 __user *code = NULL;
+	u8 len = 0;
 	int probed = 0;
 	int i;
 
 	if (!msg->msg_iov)
-		return;
+		return 0;
 
 	for (i = 0; i < msg->msg_iovlen; i++) {
 		iov = &msg->msg_iov[i];
 		if (!iov)
 			continue;
 
-		switch (fl->proto) {
+		switch (fl6->flowi6_proto) {
 		case IPPROTO_ICMPV6:
 			/* check if one-byte field is readable or not. */
 			if (iov->iov_base && iov->iov_len < 1)
@@ -626,11 +705,25 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 				code = iov->iov_base;
 
 			if (type && code) {
-				get_user(fl->fl_icmp_type, type);
-				get_user(fl->fl_icmp_code, code);
+				if (get_user(fl6->fl6_icmp_type, type) ||
+				    get_user(fl6->fl6_icmp_code, code))
+					return -EFAULT;
 				probed = 1;
 			}
 			break;
+		case IPPROTO_MH:
+			if (iov->iov_base && iov->iov_len < 1)
+				break;
+			/* check if type field is readable or not. */
+			if (iov->iov_len > 2 - len) {
+				u8 __user *p = iov->iov_base;
+				if (get_user(fl6->fl6_mh_type, &p[2 - len]))
+					return -EFAULT;
+				probed = 1;
+			} else
+				len += iov->iov_len;
+
+			break;
 		default:
 			probed = 1;
 			break;
@@ -638,68 +731,71 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 		if (probed)
 			break;
 	}
+	return 0;
 }
 
 static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		   struct msghdr *msg, size_t len)
 {
 	struct ipv6_txoptions opt_space;
-	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
+	struct in6_addr *daddr, *final_p, final;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct raw6_sock *rp = raw6_sk(sk);
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct dst_entry *dst = NULL;
-	struct flowi fl;
+	struct flowi6 fl6;
 	int addr_len = msg->msg_namelen;
 	int hlimit = -1;
 	int tclass = -1;
+	int dontfrag = -1;
 	u16 proto;
 	int err;
 
 	/* Rough check on arithmetic overflow,
-	   better check is made in ip6_build_xmit
+	   better check is made in ip6_append_data().
 	 */
-	if (len < 0)
+	if (len > INT_MAX)
 		return -EMSGSIZE;
 
 	/* Mirror BSD error message compatibility */
-	if (msg->msg_flags & MSG_OOB)		
+	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
 	/*
-	 *	Get and verify the address. 
+	 *	Get and verify the address.
 	 */
-	memset(&fl, 0, sizeof(fl));
+	memset(&fl6, 0, sizeof(fl6));
+
+	fl6.flowi6_mark = sk->sk_mark;
 
 	if (sin6) {
-		if (addr_len < SIN6_LEN_RFC2133) 
+		if (addr_len < SIN6_LEN_RFC2133)
 			return -EINVAL;
 
-		if (sin6->sin6_family && sin6->sin6_family != AF_INET6) 
-			return(-EAFNOSUPPORT);
+		if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
+			return -EAFNOSUPPORT;
 
 		/* port is the proto value [0..255] carried in nexthdr */
 		proto = ntohs(sin6->sin6_port);
 
 		if (!proto)
-			proto = inet->num;
-		else if (proto != inet->num)
-			return(-EINVAL);
+			proto = inet->inet_num;
+		else if (proto != inet->inet_num)
+			return -EINVAL;
 
 		if (proto > 255)
-			return(-EINVAL);
+			return -EINVAL;
 
 		daddr = &sin6->sin6_addr;
 		if (np->sndflow) {
-			fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-			if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-				flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+			fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+			if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 				if (flowlabel == NULL)
 					return -EINVAL;
-				daddr = &flowlabel->dst;
 			}
 		}
 
@@ -708,46 +804,38 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		 * sk->sk_dst_cache.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    ipv6_addr_equal(daddr, &np->daddr))
-			daddr = &np->daddr;
+		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+			daddr = &sk->sk_v6_daddr;
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
-		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
-			fl.oif = sin6->sin6_scope_id;
+		    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
+			fl6.flowi6_oif = sin6->sin6_scope_id;
 	} else {
-		if (sk->sk_state != TCP_ESTABLISHED) 
+		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
-		
-		proto = inet->num;
-		daddr = &np->daddr;
-		fl.fl6_flowlabel = np->flow_label;
-	}
 
-	if (ipv6_addr_any(daddr)) {
-		/* 
-		 * unspecified destination address 
-		 * treated as error... is this correct ?
-		 */
-		fl6_sock_release(flowlabel);
-		return(-EINVAL);
+		proto = inet->inet_num;
+		daddr = &sk->sk_v6_daddr;
+		fl6.flowlabel = np->flow_label;
 	}
 
-	if (fl.oif == 0)
-		fl.oif = sk->sk_bound_dev_if;
+	if (fl6.flowi6_oif == 0)
+		fl6.flowi6_oif = sk->sk_bound_dev_if;
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+					    &hlimit, &tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
 		}
-		if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
-			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+		if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
 		}
@@ -756,76 +844,65 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	}
 	if (opt == NULL)
 		opt = np->opt;
-	opt = fl6_merge_options(&opt_space, flowlabel, opt);
-
-	fl.proto = proto;
-	rawv6_probe_proto_opt(&fl, msg);
- 
-	ipv6_addr_copy(&fl.fl6_dst, daddr);
-	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	if (flowlabel)
+		opt = fl6_merge_options(&opt_space, flowlabel, opt);
+	opt = ipv6_fixup_options(&opt_space, opt);
 
-	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
-		fl.oif = np->mcast_oif;
-
-	err = ip6_dst_lookup(sk, &dst, &fl);
+	fl6.flowi6_proto = proto;
+	err = rawv6_probe_proto_opt(&fl6, msg);
 	if (err)
 		goto out;
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
-		goto out;
+	if (!ipv6_addr_any(daddr))
+		fl6.daddr = *daddr;
+	else
+		fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+	if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
+		fl6.saddr = np->saddr;
 
-	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(&fl.fl6_dst))
-			hlimit = np->mcast_hops;
-		else
-			hlimit = np->hop_limit;
-		if (hlimit < 0)
-			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
-		if (hlimit < 0)
-			hlimit = ipv6_get_hoplimit(dst->dev);
-	}
+	final_p = fl6_update_dst(&fl6, opt, &final);
+
+	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+		fl6.flowi6_oif = np->mcast_oif;
+	else if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->ucast_oif;
+	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	if (tclass < 0) {
-		tclass = np->cork.tclass;
-		if (tclass < 0)
-			tclass = 0;
+	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		goto out;
 	}
+	if (hlimit < 0)
+		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+
+	if (tclass < 0)
+		tclass = np->tclass;
+
+	if (dontfrag < 0)
+		dontfrag = np->dontfrag;
 
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
 
 back_from_confirm:
-	if (inet->hdrincl) {
-		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
-	} else {
+	if (inet->hdrincl)
+		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags);
+	else {
 		lock_sock(sk);
 		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
-			len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
-			msg->msg_flags);
+			len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst,
+			msg->msg_flags, dontfrag);
 
 		if (err)
 			ip6_flush_pending_frames(sk);
 		else if (!(msg->msg_flags & MSG_MORE))
-			err = rawv6_push_pending_frames(sk, &fl, rp);
+			err = rawv6_push_pending_frames(sk, &fl6, rp);
+		release_sock(sk);
 	}
 done:
-	ip6_dst_store(sk, dst,
-		      ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
-		      &np->daddr : NULL);
-
-	release_sock(sk);
-out:	
+	dst_release(dst);
+out:
 	fl6_sock_release(flowlabel);
 	return err<0?err:len;
 do_confirm:
@@ -836,7 +913,7 @@ do_confirm:
 	goto done;
 }
 
-static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, 
+static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
 			       char __user *optval, int optlen)
 {
 	switch (optname) {
@@ -848,12 +925,12 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	return 0;
 }
 
-static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, 
+static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
 			       char __user *optval, int __user *optlen)
 {
 	int len;
@@ -873,87 +950,113 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	return 0;
 }
 
 
-static int rawv6_setsockopt(struct sock *sk, int level, int optname, 
-			    char __user *optval, int optlen)
+static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, unsigned int optlen)
 {
 	struct raw6_sock *rp = raw6_sk(sk);
 	int val;
 
-	switch(level) {
-		case SOL_RAW:
-			break;
-
-		case SOL_ICMPV6:
-			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
-				return -EOPNOTSUPP;
-			return rawv6_seticmpfilter(sk, level, optname, optval,
-						   optlen);
-		case SOL_IPV6:
-			if (optname == IPV6_CHECKSUM)
-				break;
-		default:
-			return ipv6_setsockopt(sk, level, optname, optval,
-					       optlen);
-	};
-
-  	if (get_user(val, (int __user *)optval))
+	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
 
 	switch (optname) {
-		case IPV6_CHECKSUM:
-			/* You may get strange result with a positive odd offset;
-			   RFC2292bis agrees with me. */
-			if (val > 0 && (val&1))
-				return(-EINVAL);
-			if (val < 0) {
-				rp->checksum = 0;
-			} else {
-				rp->checksum = 1;
-				rp->offset = val;
-			}
+	case IPV6_CHECKSUM:
+		if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
+		    level == IPPROTO_IPV6) {
+			/*
+			 * RFC3542 tells that IPV6_CHECKSUM socket
+			 * option in the IPPROTO_IPV6 level is not
+			 * allowed on ICMPv6 sockets.
+			 * If you want to set it, use IPPROTO_RAW
+			 * level IPV6_CHECKSUM socket option
+			 * (Linux extension).
+			 */
+			return -EINVAL;
+		}
+
+		/* You may get strange result with a positive odd offset;
+		   RFC2292bis agrees with me. */
+		if (val > 0 && (val&1))
+			return -EINVAL;
+		if (val < 0) {
+			rp->checksum = 0;
+		} else {
+			rp->checksum = 1;
+			rp->offset = val;
+		}
+
+		return 0;
 
-			return 0;
+	default:
+		return -ENOPROTOOPT;
+	}
+}
+
+static int rawv6_setsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, unsigned int optlen)
+{
+	switch (level) {
+	case SOL_RAW:
+		break;
+
+	case SOL_ICMPV6:
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+			return -EOPNOTSUPP;
+		return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
+	case SOL_IPV6:
+		if (optname == IPV6_CHECKSUM)
 			break;
+	default:
+		return ipv6_setsockopt(sk, level, optname, optval, optlen);
+	}
 
-		default:
-			return(-ENOPROTOOPT);
+	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
+}
+
+#ifdef CONFIG_COMPAT
+static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
+				   char __user *optval, unsigned int optlen)
+{
+	switch (level) {
+	case SOL_RAW:
+		break;
+	case SOL_ICMPV6:
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+			return -EOPNOTSUPP;
+		return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
+	case SOL_IPV6:
+		if (optname == IPV6_CHECKSUM)
+			break;
+	default:
+		return compat_ipv6_setsockopt(sk, level, optname,
+					      optval, optlen);
 	}
+	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
 }
+#endif
 
-static int rawv6_getsockopt(struct sock *sk, int level, int optname, 
+static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
 			    char __user *optval, int __user *optlen)
 {
 	struct raw6_sock *rp = raw6_sk(sk);
 	int val, len;
 
-	switch(level) {
-		case SOL_RAW:
-			break;
-
-		case SOL_ICMPV6:
-			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
-				return -EOPNOTSUPP;
-			return rawv6_geticmpfilter(sk, level, optname, optval,
-						   optlen);
-		case SOL_IPV6:
-			if (optname == IPV6_CHECKSUM)
-				break;
-		default:
-			return ipv6_getsockopt(sk, level, optname, optval,
-					       optlen);
-	};
-
 	if (get_user(len,optlen))
 		return -EFAULT;
 
 	switch (optname) {
 	case IPV6_CHECKSUM:
+		/*
+		 * We allow getsockopt() for IPPROTO_IPV6-level
+		 * IPV6_CHECKSUM socket option on ICMPv6 sockets
+		 * since RFC3542 is silent about it.
+		 */
 		if (rp->checksum == 0)
 			val = -1;
 		else
@@ -973,226 +1076,267 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
 	return 0;
 }
 
-static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int rawv6_getsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, int __user *optlen)
 {
-	switch(cmd) {
-		case SIOCOUTQ:
-		{
-			int amount = atomic_read(&sk->sk_wmem_alloc);
-			return put_user(amount, (int __user *)arg);
-		}
-		case SIOCINQ:
-		{
-			struct sk_buff *skb;
-			int amount = 0;
-
-			spin_lock_bh(&sk->sk_receive_queue.lock);
-			skb = skb_peek(&sk->sk_receive_queue);
-			if (skb != NULL)
-				amount = skb->tail - skb->h.raw;
-			spin_unlock_bh(&sk->sk_receive_queue.lock);
-			return put_user(amount, (int __user *)arg);
-		}
+	switch (level) {
+	case SOL_RAW:
+		break;
 
-		default:
-			return -ENOIOCTLCMD;
+	case SOL_ICMPV6:
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+			return -EOPNOTSUPP;
+		return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
+	case SOL_IPV6:
+		if (optname == IPV6_CHECKSUM)
+			break;
+	default:
+		return ipv6_getsockopt(sk, level, optname, optval, optlen);
 	}
-}
-
-static void rawv6_close(struct sock *sk, long timeout)
-{
-	if (inet_sk(sk)->num == IPPROTO_RAW)
-		ip6_ra_control(sk, -1, NULL);
 
-	sk_common_release(sk);
+	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
 }
 
-static int rawv6_init_sk(struct sock *sk)
+#ifdef CONFIG_COMPAT
+static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
+				   char __user *optval, int __user *optlen)
 {
-	if (inet_sk(sk)->num == IPPROTO_ICMPV6) {
-		struct raw6_sock *rp = raw6_sk(sk);
-		rp->checksum = 1;
-		rp->offset   = 2;
+	switch (level) {
+	case SOL_RAW:
+		break;
+	case SOL_ICMPV6:
+		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+			return -EOPNOTSUPP;
+		return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
+	case SOL_IPV6:
+		if (optname == IPV6_CHECKSUM)
+			break;
+	default:
+		return compat_ipv6_getsockopt(sk, level, optname,
+					      optval, optlen);
 	}
-	return(0);
+	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
 }
+#endif
 
-struct proto rawv6_prot = {
-	.name =		"RAWv6",
-	.owner =	THIS_MODULE,
-	.close =	rawv6_close,
-	.connect =	ip6_datagram_connect,
-	.disconnect =	udp_disconnect,
-	.ioctl =	rawv6_ioctl,
-	.init =		rawv6_init_sk,
-	.destroy =	inet6_destroy_sock,
-	.setsockopt =	rawv6_setsockopt,
-	.getsockopt =	rawv6_getsockopt,
-	.sendmsg =	rawv6_sendmsg,
-	.recvmsg =	rawv6_recvmsg,
-	.bind =		rawv6_bind,
-	.backlog_rcv =	rawv6_rcv_skb,
-	.hash =		raw_v6_hash,
-	.unhash =	raw_v6_unhash,
-	.obj_size =	sizeof(struct raw6_sock),
-};
-
-#ifdef CONFIG_PROC_FS
-struct raw6_iter_state {
-	int bucket;
-};
-
-#define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private)
-
-static struct sock *raw6_get_first(struct seq_file *seq)
+static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
-	struct sock *sk;
-	struct hlist_node *node;
-	struct raw6_iter_state* state = raw6_seq_private(seq);
+	switch (cmd) {
+	case SIOCOUTQ: {
+		int amount = sk_wmem_alloc_get(sk);
 
-	for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket)
-		sk_for_each(sk, node, &raw_v6_htable[state->bucket])
-			if (sk->sk_family == PF_INET6)
-				goto out;
-	sk = NULL;
-out:
-	return sk;
-}
-
-static struct sock *raw6_get_next(struct seq_file *seq, struct sock *sk)
-{
-	struct raw6_iter_state* state = raw6_seq_private(seq);
+		return put_user(amount, (int __user *)arg);
+	}
+	case SIOCINQ: {
+		struct sk_buff *skb;
+		int amount = 0;
 
-	do {
-		sk = sk_next(sk);
-try_again:
-		;
-	} while (sk && sk->sk_family != PF_INET6);
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb != NULL)
+			amount = skb_tail_pointer(skb) -
+				skb_transport_header(skb);
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		return put_user(amount, (int __user *)arg);
+	}
 
-	if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
-		sk = sk_head(&raw_v6_htable[state->bucket]);
-		goto try_again;
+	default:
+#ifdef CONFIG_IPV6_MROUTE
+		return ip6mr_ioctl(sk, cmd, (void __user *)arg);
+#else
+		return -ENOIOCTLCMD;
+#endif
 	}
-	return sk;
 }
 
-static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos)
+#ifdef CONFIG_COMPAT
+static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
 {
-	struct sock *sk = raw6_get_first(seq);
-	if (sk)
-		while (pos && (sk = raw6_get_next(seq, sk)) != NULL)
-			--pos;
-	return pos ? NULL : sk;
+	switch (cmd) {
+	case SIOCOUTQ:
+	case SIOCINQ:
+		return -ENOIOCTLCMD;
+	default:
+#ifdef CONFIG_IPV6_MROUTE
+		return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg));
+#else
+		return -ENOIOCTLCMD;
+#endif
+	}
 }
+#endif
 
-static void *raw6_seq_start(struct seq_file *seq, loff_t *pos)
+static void rawv6_close(struct sock *sk, long timeout)
 {
-	read_lock(&raw_v6_lock);
-	return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+	if (inet_sk(sk)->inet_num == IPPROTO_RAW)
+		ip6_ra_control(sk, -1);
+	ip6mr_sk_done(sk);
+	sk_common_release(sk);
 }
 
-static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void raw6_destroy(struct sock *sk)
 {
-	struct sock *sk;
+	lock_sock(sk);
+	ip6_flush_pending_frames(sk);
+	release_sock(sk);
 
-	if (v == SEQ_START_TOKEN)
-		sk = raw6_get_first(seq);
-	else
-		sk = raw6_get_next(seq, v);
-	++*pos;
-	return sk;
+	inet6_destroy_sock(sk);
 }
 
-static void raw6_seq_stop(struct seq_file *seq, void *v)
+static int rawv6_init_sk(struct sock *sk)
 {
-	read_unlock(&raw_v6_lock);
-}
+	struct raw6_sock *rp = raw6_sk(sk);
 
-static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
-{
-	struct ipv6_pinfo *np = inet6_sk(sp);
-	struct in6_addr *dest, *src;
-	__u16 destp, srcp;
-
-	dest  = &np->daddr;
-	src   = &np->rcv_saddr;
-	destp = 0;
-	srcp  = inet_sk(sp)->num;
-	seq_printf(seq,
-		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n",
-		   i,
-		   src->s6_addr32[0], src->s6_addr32[1],
-		   src->s6_addr32[2], src->s6_addr32[3], srcp,
-		   dest->s6_addr32[0], dest->s6_addr32[1],
-		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
-		   sp->sk_state, 
-		   atomic_read(&sp->sk_wmem_alloc),
-		   atomic_read(&sp->sk_rmem_alloc),
-		   0, 0L, 0,
-		   sock_i_uid(sp), 0,
-		   sock_i_ino(sp),
-		   atomic_read(&sp->sk_refcnt), sp);
+	switch (inet_sk(sk)->inet_num) {
+	case IPPROTO_ICMPV6:
+		rp->checksum = 1;
+		rp->offset   = 2;
+		break;
+	case IPPROTO_MH:
+		rp->checksum = 1;
+		rp->offset   = 4;
+		break;
+	default:
+		break;
+	}
+	return 0;
 }
 
+struct proto rawv6_prot = {
+	.name		   = "RAWv6",
+	.owner		   = THIS_MODULE,
+	.close		   = rawv6_close,
+	.destroy	   = raw6_destroy,
+	.connect	   = ip6_datagram_connect_v6_only,
+	.disconnect	   = udp_disconnect,
+	.ioctl		   = rawv6_ioctl,
+	.init		   = rawv6_init_sk,
+	.setsockopt	   = rawv6_setsockopt,
+	.getsockopt	   = rawv6_getsockopt,
+	.sendmsg	   = rawv6_sendmsg,
+	.recvmsg	   = rawv6_recvmsg,
+	.bind		   = rawv6_bind,
+	.backlog_rcv	   = rawv6_rcv_skb,
+	.hash		   = raw_hash_sk,
+	.unhash		   = raw_unhash_sk,
+	.obj_size	   = sizeof(struct raw6_sock),
+	.h.raw_hash	   = &raw_v6_hashinfo,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_rawv6_setsockopt,
+	.compat_getsockopt = compat_rawv6_getsockopt,
+	.compat_ioctl	   = compat_rawv6_ioctl,
+#endif
+};
+
+#ifdef CONFIG_PROC_FS
 static int raw6_seq_show(struct seq_file *seq, void *v)
 {
-	if (v == SEQ_START_TOKEN)
-		seq_printf(seq,
-			   "  sl  "
-			   "local_address                         "
-			   "remote_address                        "
-			   "st tx_queue rx_queue tr tm->when retrnsmt"
-			   "   uid  timeout inode\n");
-	else
-		raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket);
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+	} else {
+		struct sock *sp = v;
+		__u16 srcp  = inet_sk(sp)->inet_num;
+		ip6_dgram_sock_seq_show(seq, v, srcp, 0,
+					raw_seq_private(seq)->bucket);
+	}
 	return 0;
 }
 
-static struct seq_operations raw6_seq_ops = {
-	.start =	raw6_seq_start,
-	.next =		raw6_seq_next,
-	.stop =		raw6_seq_stop,
+static const struct seq_operations raw6_seq_ops = {
+	.start =	raw_seq_start,
+	.next =		raw_seq_next,
+	.stop =		raw_seq_stop,
 	.show =		raw6_seq_show,
 };
 
 static int raw6_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int rc = -ENOMEM;
-	struct raw6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (!s)
-		goto out;
-	rc = seq_open(file, &raw6_seq_ops);
-	if (rc)
-		goto out_kfree;
-	seq = file->private_data;
-	seq->private = s;
-	memset(s, 0, sizeof(*s));
-out:
-	return rc;
-out_kfree:
-	kfree(s);
-	goto out;
+	return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops);
 }
 
-static struct file_operations raw6_seq_fops = {
+static const struct file_operations raw6_seq_fops = {
 	.owner =	THIS_MODULE,
 	.open =		raw6_seq_open,
 	.read =		seq_read,
 	.llseek =	seq_lseek,
-	.release =	seq_release_private,
+	.release =	seq_release_net,
 };
 
-int __init raw6_proc_init(void)
+static int __net_init raw6_init_net(struct net *net)
 {
-	if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
+	if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
 		return -ENOMEM;
+
 	return 0;
 }
 
+static void __net_exit raw6_exit_net(struct net *net)
+{
+	remove_proc_entry("raw6", net->proc_net);
+}
+
+static struct pernet_operations raw6_net_ops = {
+	.init = raw6_init_net,
+	.exit = raw6_exit_net,
+};
+
+int __init raw6_proc_init(void)
+{
+	return register_pernet_subsys(&raw6_net_ops);
+}
+
 void raw6_proc_exit(void)
 {
-	proc_net_remove("raw6");
+	unregister_pernet_subsys(&raw6_net_ops);
 }
 #endif	/* CONFIG_PROC_FS */
+
+/* Same as inet6_dgram_ops, sans udp_poll.  */
+static const struct proto_ops inet6_sockraw_ops = {
+	.family		   = PF_INET6,
+	.owner		   = THIS_MODULE,
+	.release	   = inet6_release,
+	.bind		   = inet6_bind,
+	.connect	   = inet_dgram_connect,	/* ok		*/
+	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
+	.accept		   = sock_no_accept,		/* a do nothing	*/
+	.getname	   = inet6_getname,
+	.poll		   = datagram_poll,		/* ok		*/
+	.ioctl		   = inet6_ioctl,		/* must change  */
+	.listen		   = sock_no_listen,		/* ok		*/
+	.shutdown	   = inet_shutdown,		/* ok		*/
+	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
+	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
+	.sendmsg	   = inet_sendmsg,		/* ok		*/
+	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static struct inet_protosw rawv6_protosw = {
+	.type		= SOCK_RAW,
+	.protocol	= IPPROTO_IP,	/* wild card */
+	.prot		= &rawv6_prot,
+	.ops		= &inet6_sockraw_ops,
+	.flags		= INET_PROTOSW_REUSE,
+};
+
+int __init rawv6_init(void)
+{
+	int ret;
+
+	ret = inet6_register_protosw(&rawv6_protosw);
+	if (ret)
+		goto out;
+out:
+	return ret;
+}
+
+void rawv6_exit(void)
+{
+	inet6_unregister_protosw(&rawv6_protosw);
+}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e4fe9ee484d..cc85a9ba501 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -1,11 +1,9 @@
 /*
  *	IPv6 fragment reassembly
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
- *
- *	$Id: reassembly.c,v 1.26 2001/03/07 22:00:57 davem Exp $
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	Based on: net/ipv4/ip_fragment.c
  *
@@ -15,8 +13,8 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-/* 
- *	Fixes:	
+/*
+ *	Fixes:
  *	Andi Kleen	Make it work with multiple hosts.
  *			More RFC compliance.
  *
@@ -28,7 +26,9 @@
  *	YOSHIFUJI,H. @USAGI	Always remove fragment header to
  *				calculate ICV correctly.
  */
-#include <linux/config.h>
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -43,21 +43,22 @@
 #include <linux/icmpv6.h>
 #include <linux/random.h>
 #include <linux/jhash.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/export.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
 
 #include <net/ipv6.h>
+#include <net/ip6_route.h>
 #include <net/protocol.h>
 #include <net/transp_v6.h>
 #include <net/rawv6.h>
 #include <net/ndisc.h>
 #include <net/addrconf.h>
-
-int sysctl_ip6frag_high_thresh = 256*1024;
-int sysctl_ip6frag_low_thresh = 192*1024;
-
-int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
+#include <net/inet_frag.h>
+#include <net/inet_ecn.h>
 
 struct ip6frag_skb_cb
 {
@@ -67,391 +68,185 @@ struct ip6frag_skb_cb
 
 #define FRAG6_CB(skb)	((struct ip6frag_skb_cb*)((skb)->cb))
 
-
-/*
- *	Equivalent of ipv4 struct ipq
- */
-
-struct frag_queue
-{
-	struct frag_queue	*next;
-	struct list_head lru_list;		/* lru list member	*/
-
-	__u32			id;		/* fragment id		*/
-	struct in6_addr		saddr;
-	struct in6_addr		daddr;
-
-	spinlock_t		lock;
-	atomic_t		refcnt;
-	struct timer_list	timer;		/* expire timer		*/
-	struct sk_buff		*fragments;
-	int			len;
-	int			meat;
-	int			iif;
-	struct timeval		stamp;
-	unsigned int		csum;
-	__u8			last_in;	/* has first/last segment arrived? */
-#define COMPLETE		4
-#define FIRST_IN		2
-#define LAST_IN			1
-	__u16			nhoffset;
-	struct frag_queue	**pprev;
-};
-
-/* Hash table. */
-
-#define IP6Q_HASHSZ	64
-
-static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ];
-static DEFINE_RWLOCK(ip6_frag_lock);
-static u32 ip6_frag_hash_rnd;
-static LIST_HEAD(ip6_frag_lru_list);
-int ip6_frag_nqueues = 0;
-
-static __inline__ void __fq_unlink(struct frag_queue *fq)
-{
-	if(fq->next)
-		fq->next->pprev = fq->pprev;
-	*fq->pprev = fq->next;
-	list_del(&fq->lru_list);
-	ip6_frag_nqueues--;
-}
-
-static __inline__ void fq_unlink(struct frag_queue *fq)
+static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
 {
-	write_lock(&ip6_frag_lock);
-	__fq_unlink(fq);
-	write_unlock(&ip6_frag_lock);
+	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
 }
 
-static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
-			       struct in6_addr *daddr)
-{
-	u32 a, b, c;
-
-	a = saddr->s6_addr32[0];
-	b = saddr->s6_addr32[1];
-	c = saddr->s6_addr32[2];
+static struct inet_frags ip6_frags;
 
-	a += JHASH_GOLDEN_RATIO;
-	b += JHASH_GOLDEN_RATIO;
-	c += ip6_frag_hash_rnd;
-	__jhash_mix(a, b, c);
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+			  struct net_device *dev);
 
-	a += saddr->s6_addr32[3];
-	b += daddr->s6_addr32[0];
-	c += daddr->s6_addr32[1];
-	__jhash_mix(a, b, c);
-
-	a += daddr->s6_addr32[2];
-	b += daddr->s6_addr32[3];
-	c += id;
-	__jhash_mix(a, b, c);
-
-	return c & (IP6Q_HASHSZ - 1);
-}
-
-static struct timer_list ip6_frag_secret_timer;
-int sysctl_ip6frag_secret_interval = 10 * 60 * HZ;
-
-static void ip6_frag_secret_rebuild(unsigned long dummy)
-{
-	unsigned long now = jiffies;
-	int i;
-
-	write_lock(&ip6_frag_lock);
-	get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32));
-	for (i = 0; i < IP6Q_HASHSZ; i++) {
-		struct frag_queue *q;
-
-		q = ip6_frag_hash[i];
-		while (q) {
-			struct frag_queue *next = q->next;
-			unsigned int hval = ip6qhashfn(q->id,
-						       &q->saddr,
-						       &q->daddr);
-
-			if (hval != i) {
-				/* Unlink. */
-				if (q->next)
-					q->next->pprev = q->pprev;
-				*q->pprev = q->next;
-
-				/* Relink to new hash chain. */
-				if ((q->next = ip6_frag_hash[hval]) != NULL)
-					q->next->pprev = &q->next;
-				ip6_frag_hash[hval] = q;
-				q->pprev = &ip6_frag_hash[hval];
-			}
-
-			q = next;
-		}
-	}
-	write_unlock(&ip6_frag_lock);
-
-	mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval);
-}
-
-atomic_t ip6_frag_mem = ATOMIC_INIT(0);
-
-/* Memory Tracking Functions. */
-static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
-{
-	if (work)
-		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &ip6_frag_mem);
-	kfree_skb(skb);
-}
-
-static inline void frag_free_queue(struct frag_queue *fq, int *work)
+/*
+ * callers should be careful not to use the hash value outside the ipfrag_lock
+ * as doing so could race with ipfrag_hash_rnd being recalculated.
+ */
+static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+				    const struct in6_addr *daddr)
 {
-	if (work)
-		*work -= sizeof(struct frag_queue);
-	atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem);
-	kfree(fq);
-}
+	u32 c;
 
-static inline struct frag_queue *frag_alloc_queue(void)
-{
-	struct frag_queue *fq = kmalloc(sizeof(struct frag_queue), GFP_ATOMIC);
+	net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
+	c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+			 (__force u32)id, ip6_frags.rnd);
 
-	if(!fq)
-		return NULL;
-	atomic_add(sizeof(struct frag_queue), &ip6_frag_mem);
-	return fq;
+	return c & (INETFRAGS_HASHSZ - 1);
 }
 
-/* Destruction primitives. */
-
-/* Complete destruction of fq. */
-static void ip6_frag_destroy(struct frag_queue *fq, int *work)
+static unsigned int ip6_hashfn(struct inet_frag_queue *q)
 {
-	struct sk_buff *fp;
-
-	BUG_TRAP(fq->last_in&COMPLETE);
-	BUG_TRAP(del_timer(&fq->timer) == 0);
-
-	/* Release all fragment data. */
-	fp = fq->fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	frag_free_queue(fq, work);
-}
+	struct frag_queue *fq;
 
-static __inline__ void fq_put(struct frag_queue *fq, int *work)
-{
-	if (atomic_dec_and_test(&fq->refcnt))
-		ip6_frag_destroy(fq, work);
+	fq = container_of(q, struct frag_queue, q);
+	return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
 }
 
-/* Kill fq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static __inline__ void fq_kill(struct frag_queue *fq)
+bool ip6_frag_match(struct inet_frag_queue *q, void *a)
 {
-	if (del_timer(&fq->timer))
-		atomic_dec(&fq->refcnt);
+	struct frag_queue *fq;
+	struct ip6_create_arg *arg = a;
 
-	if (!(fq->last_in & COMPLETE)) {
-		fq_unlink(fq);
-		atomic_dec(&fq->refcnt);
-		fq->last_in |= COMPLETE;
-	}
+	fq = container_of(q, struct frag_queue, q);
+	return	fq->id == arg->id &&
+		fq->user == arg->user &&
+		ipv6_addr_equal(&fq->saddr, arg->src) &&
+		ipv6_addr_equal(&fq->daddr, arg->dst);
 }
+EXPORT_SYMBOL(ip6_frag_match);
 
-static void ip6_evictor(void)
+void ip6_frag_init(struct inet_frag_queue *q, void *a)
 {
-	struct frag_queue *fq;
-	struct list_head *tmp;
-	int work;
-
-	work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh;
-	if (work <= 0)
-		return;
-
-	while(work > 0) {
-		read_lock(&ip6_frag_lock);
-		if (list_empty(&ip6_frag_lru_list)) {
-			read_unlock(&ip6_frag_lock);
-			return;
-		}
-		tmp = ip6_frag_lru_list.next;
-		fq = list_entry(tmp, struct frag_queue, lru_list);
-		atomic_inc(&fq->refcnt);
-		read_unlock(&ip6_frag_lock);
-
-		spin_lock(&fq->lock);
-		if (!(fq->last_in&COMPLETE))
-			fq_kill(fq);
-		spin_unlock(&fq->lock);
-
-		fq_put(fq, &work);
-		IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
-	}
+	struct frag_queue *fq = container_of(q, struct frag_queue, q);
+	struct ip6_create_arg *arg = a;
+
+	fq->id = arg->id;
+	fq->user = arg->user;
+	fq->saddr = *arg->src;
+	fq->daddr = *arg->dst;
+	fq->ecn = arg->ecn;
 }
+EXPORT_SYMBOL(ip6_frag_init);
 
-static void ip6_frag_expire(unsigned long data)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+			   struct inet_frags *frags)
 {
-	struct frag_queue *fq = (struct frag_queue *) data;
+	struct net_device *dev = NULL;
 
-	spin_lock(&fq->lock);
+	spin_lock(&fq->q.lock);
 
-	if (fq->last_in & COMPLETE)
+	if (fq->q.last_in & INET_FRAG_COMPLETE)
 		goto out;
 
-	fq_kill(fq);
+	inet_frag_kill(&fq->q, frags);
 
-	IP6_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
-	IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, fq->iif);
+	if (!dev)
+		goto out_rcu_unlock;
 
-	/* Send error only if the first segment arrived. */
-	if (fq->last_in&FIRST_IN && fq->fragments) {
-		struct net_device *dev = dev_get_by_index(fq->iif);
-
-		/*
-		   But use as source device on which LAST ARRIVED
-		   segment was received. And do not use fq->dev
-		   pointer directly, device might already disappeared.
-		 */
-		if (dev) {
-			fq->fragments->dev = dev;
-			icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0,
-				    dev);
-			dev_put(dev);
-		}
-	}
-out:
-	spin_unlock(&fq->lock);
-	fq_put(fq, NULL);
-}
-
-/* Creation primitives. */
+	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
 
+	/* Don't send error if the first segment did not arrive. */
+	if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
+		goto out_rcu_unlock;
 
-static struct frag_queue *ip6_frag_intern(unsigned int hash,
-					  struct frag_queue *fq_in)
-{
-	struct frag_queue *fq;
-
-	write_lock(&ip6_frag_lock);
-#ifdef CONFIG_SMP
-	for (fq = ip6_frag_hash[hash]; fq; fq = fq->next) {
-		if (fq->id == fq_in->id && 
-		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
-		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			write_unlock(&ip6_frag_lock);
-			fq_in->last_in |= COMPLETE;
-			fq_put(fq_in, NULL);
-			return fq;
-		}
-	}
-#endif
-	fq = fq_in;
-
-	if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time))
-		atomic_inc(&fq->refcnt);
-
-	atomic_inc(&fq->refcnt);
-	if((fq->next = ip6_frag_hash[hash]) != NULL)
-		fq->next->pprev = &fq->next;
-	ip6_frag_hash[hash] = fq;
-	fq->pprev = &ip6_frag_hash[hash];
-	INIT_LIST_HEAD(&fq->lru_list);
-	list_add_tail(&fq->lru_list, &ip6_frag_lru_list);
-	ip6_frag_nqueues++;
-	write_unlock(&ip6_frag_lock);
-	return fq;
+	/*
+	   But use as source device on which LAST ARRIVED
+	   segment was received. And do not use fq->dev
+	   pointer directly, device might already disappeared.
+	 */
+	fq->q.fragments->dev = dev;
+	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+out_rcu_unlock:
+	rcu_read_unlock();
+out:
+	spin_unlock(&fq->q.lock);
+	inet_frag_put(&fq->q, frags);
 }
+EXPORT_SYMBOL(ip6_expire_frag_queue);
 
-
-static struct frag_queue *
-ip6_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst)
+static void ip6_frag_expire(unsigned long data)
 {
 	struct frag_queue *fq;
+	struct net *net;
 
-	if ((fq = frag_alloc_queue()) == NULL)
-		goto oom;
-
-	memset(fq, 0, sizeof(struct frag_queue));
+	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	net = container_of(fq->q.net, struct net, ipv6.frags);
 
-	fq->id = id;
-	ipv6_addr_copy(&fq->saddr, src);
-	ipv6_addr_copy(&fq->daddr, dst);
-
-	init_timer(&fq->timer);
-	fq->timer.function = ip6_frag_expire;
-	fq->timer.data = (long) fq;
-	spin_lock_init(&fq->lock);
-	atomic_set(&fq->refcnt, 1);
-
-	return ip6_frag_intern(hash, fq);
-
-oom:
-	IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
-	return NULL;
+	ip6_expire_frag_queue(net, fq, &ip6_frags);
 }
 
 static __inline__ struct frag_queue *
-fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
+fq_find(struct net *net, __be32 id, const struct in6_addr *src,
+	const struct in6_addr *dst, u8 ecn)
 {
-	struct frag_queue *fq;
-	unsigned int hash = ip6qhashfn(id, src, dst);
-
-	read_lock(&ip6_frag_lock);
-	for(fq = ip6_frag_hash[hash]; fq; fq = fq->next) {
-		if (fq->id == id && 
-		    ipv6_addr_equal(src, &fq->saddr) &&
-		    ipv6_addr_equal(dst, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			read_unlock(&ip6_frag_lock);
-			return fq;
-		}
+	struct inet_frag_queue *q;
+	struct ip6_create_arg arg;
+	unsigned int hash;
+
+	arg.id = id;
+	arg.user = IP6_DEFRAG_LOCAL_DELIVER;
+	arg.src = src;
+	arg.dst = dst;
+	arg.ecn = ecn;
+
+	read_lock(&ip6_frags.lock);
+	hash = inet6_hash_frag(id, src, dst);
+
+	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
 	}
-	read_unlock(&ip6_frag_lock);
-
-	return ip6_frag_create(hash, id, src, dst);
+	return container_of(q, struct frag_queue, q);
 }
 
-
-static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, 
+static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			   struct frag_hdr *fhdr, int nhoff)
 {
 	struct sk_buff *prev, *next;
+	struct net_device *dev;
 	int offset, end;
+	struct net *net = dev_net(skb_dst(skb)->dev);
+	u8 ecn;
 
-	if (fq->last_in & COMPLETE)
+	if (fq->q.last_in & INET_FRAG_COMPLETE)
 		goto err;
 
 	offset = ntohs(fhdr->frag_off) & ~0x7;
-	end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
-			((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
- 		icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
- 		return;
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+				 IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  ((u8 *)&fhdr->frag_off -
+				   skb_network_header(skb)));
+		return -1;
 	}
 
- 	if (skb->ip_summed == CHECKSUM_HW)
- 		skb->csum = csum_sub(skb->csum,
- 				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
+	ecn = ip6_frag_ecn(ipv6_hdr(skb));
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		const unsigned char *nh = skb_network_header(skb);
+		skb->csum = csum_sub(skb->csum,
+				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
+						  0));
+	}
 
 	/* Is this the final fragment? */
 	if (!(fhdr->frag_off & htons(IP6_MF))) {
 		/* If we already have some bits beyond end
 		 * or have different end, the segment is corrupted.
 		 */
-		if (end < fq->len ||
-		    ((fq->last_in & LAST_IN) && end != fq->len))
+		if (end < fq->q.len ||
+		    ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
 			goto err;
-		fq->last_in |= LAST_IN;
-		fq->len = end;
+		fq->q.last_in |= INET_FRAG_LAST_IN;
+		fq->q.len = end;
 	} else {
 		/* Check if the fragment is rounded to 8 bytes.
 		 * Required by the RFC.
@@ -460,16 +255,17 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			/* RFC2460 says always send parameter problem in
 			 * this case. -DaveM
 			 */
-			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+					 IPSTATS_MIB_INHDRERRORS);
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 					  offsetof(struct ipv6hdr, payload_len));
-			return;
+			return -1;
 		}
-		if (end > fq->len) {
+		if (end > fq->q.len) {
 			/* Some bits beyond end -> corruption. */
-			if (fq->last_in & LAST_IN)
+			if (fq->q.last_in & INET_FRAG_LAST_IN)
 				goto err;
-			fq->len = end;
+			fq->q.len = end;
 		}
 	}
 
@@ -479,7 +275,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	/* Point into the IP datagram 'data' part. */
 	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
 		goto err;
-	
+
 	if (pskb_trim_rcsum(skb, end - offset))
 		goto err;
 
@@ -487,97 +283,86 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = fq->q.fragments_tail;
+	if (!prev || FRAG6_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
-	for(next = fq->fragments; next != NULL; next = next->next) {
+	for(next = fq->q.fragments; next != NULL; next = next->next) {
 		if (FRAG6_CB(next)->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
 	}
 
-	/* We found where to put this one.  Check for overlap with
-	 * preceding fragment, and, if needed, align things so that
-	 * any overlaps are eliminated.
+found:
+	/* RFC5722, Section 4, amended by Errata ID : 3089
+	 *                          When reassembling an IPv6 datagram, if
+	 *   one or more its constituent fragments is determined to be an
+	 *   overlapping fragment, the entire datagram (and any constituent
+	 *   fragments) MUST be silently discarded.
 	 */
-	if (prev) {
-		int i = (FRAG6_CB(prev)->offset + prev->len) - offset;
 
-		if (i > 0) {
-			offset += i;
-			if (end <= offset)
-				goto err;
-			if (!pskb_pull(skb, i))
-				goto err;
-			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-				skb->ip_summed = CHECKSUM_NONE;
-		}
-	}
-
-	/* Look for overlap with succeeding segments.
-	 * If we can merge fragments, do it.
-	 */
-	while (next && FRAG6_CB(next)->offset < end) {
-		int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */
-
-		if (i < next->len) {
-			/* Eat head of the next overlapped fragment
-			 * and leave the loop. The next ones cannot overlap.
-			 */
-			if (!pskb_pull(next, i))
-				goto err;
-			FRAG6_CB(next)->offset += i;	/* next fragment */
-			fq->meat -= i;
-			if (next->ip_summed != CHECKSUM_UNNECESSARY)
-				next->ip_summed = CHECKSUM_NONE;
-			break;
-		} else {
-			struct sk_buff *free_it = next;
-
-			/* Old fragment is completely overridden with
-			 * new one drop it.
-			 */
-			next = next->next;
+	/* Check for overlap with preceding fragment. */
+	if (prev &&
+	    (FRAG6_CB(prev)->offset + prev->len) > offset)
+		goto discard_fq;
 
-			if (prev)
-				prev->next = next;
-			else
-				fq->fragments = next;
-
-			fq->meat -= free_it->len;
-			frag_kfree_skb(free_it, NULL);
-		}
-	}
+	/* Look for overlap with succeeding segment. */
+	if (next && FRAG6_CB(next)->offset < end)
+		goto discard_fq;
 
 	FRAG6_CB(skb)->offset = offset;
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		fq->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
-		fq->fragments = skb;
+		fq->q.fragments = skb;
 
-	if (skb->dev)
-		fq->iif = skb->dev->ifindex;
-	skb->dev = NULL;
-	skb_get_timestamp(skb, &fq->stamp);
-	fq->meat += skb->len;
-	atomic_add(skb->truesize, &ip6_frag_mem);
+	dev = skb->dev;
+	if (dev) {
+		fq->iif = dev->ifindex;
+		skb->dev = NULL;
+	}
+	fq->q.stamp = skb->tstamp;
+	fq->q.meat += skb->len;
+	fq->ecn |= ecn;
+	add_frag_mem_limit(&fq->q, skb->truesize);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
 	 */
 	if (offset == 0) {
 		fq->nhoffset = nhoff;
-		fq->last_in |= FIRST_IN;
+		fq->q.last_in |= INET_FRAG_FIRST_IN;
+	}
+
+	if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+	    fq->q.meat == fq->q.len) {
+		int res;
+		unsigned long orefdst = skb->_skb_refdst;
+
+		skb->_skb_refdst = 0UL;
+		res = ip6_frag_reasm(fq, prev, dev);
+		skb->_skb_refdst = orefdst;
+		return res;
 	}
-	write_lock(&ip6_frag_lock);
-	list_move_tail(&fq->lru_list, &ip6_frag_lru_list);
-	write_unlock(&ip6_frag_lock);
-	return;
 
+	skb_dst_drop(skb);
+	inet_frag_lru_move(&fq->q);
+	return -1;
+
+discard_fq:
+	inet_frag_kill(&fq->q, &ip6_frags);
 err:
-	IP6_INC_STATS(IPSTATS_MIB_REASMFAILS);
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+		      IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
+	return -1;
 }
 
 /*
@@ -589,32 +374,60 @@ err:
  *	queue is eligible for reassembly i.e. it is not COMPLETE,
  *	the last and the first frames arrived and all the bits are here.
  */
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
-			  unsigned int *nhoffp,
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			  struct net_device *dev)
 {
-	struct sk_buff *fp, *head = fq->fragments;
+	struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
+	struct sk_buff *fp, *head = fq->q.fragments;
 	int    payload_len;
 	unsigned int nhoff;
+	int sum_truesize;
+	u8 ecn;
+
+	inet_frag_kill(&fq->q, &ip6_frags);
+
+	ecn = ip_frag_ecn_table[fq->ecn];
+	if (unlikely(ecn == 0xff))
+		goto out_fail;
+
+	/* Make the one we just received the head. */
+	if (prev) {
+		head = prev->next;
+		fp = skb_clone(head, GFP_ATOMIC);
 
-	fq_kill(fq);
+		if (!fp)
+			goto out_oom;
+
+		fp->next = head->next;
+		if (!fp->next)
+			fq->q.fragments_tail = fp;
+		prev->next = fp;
+
+		skb_morph(head, fq->q.fragments);
+		head->next = fq->q.fragments->next;
 
-	BUG_TRAP(head != NULL);
-	BUG_TRAP(FRAG6_CB(head)->offset == 0);
+		consume_skb(fq->q.fragments);
+		fq->q.fragments = head;
+	}
+
+	WARN_ON(head == NULL);
+	WARN_ON(FRAG6_CB(head)->offset != 0);
 
 	/* Unfragmented part is taken from the first segment. */
-	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	payload_len = ((head->data - skb_network_header(head)) -
+		       sizeof(struct ipv6hdr) + fq->q.len -
+		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN)
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_oom;
 
 	/* If the first fragment is fragmented itself, we split
 	 * it to two chunks: the first with data and paged part
 	 * and the second, holding only fragments. */
-	if (skb_shinfo(head)->frag_list) {
+	if (skb_has_frag_list(head)) {
 		struct sk_buff *clone;
 		int i, plen = 0;
 
@@ -623,146 +436,332 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 		clone->next = head->next;
 		head->next = clone;
 		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
-		skb_shinfo(head)->frag_list = NULL;
-		for (i=0; i<skb_shinfo(head)->nr_frags; i++)
-			plen += skb_shinfo(head)->frags[i].size;
+		skb_frag_list_init(head);
+		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
 		clone->len = clone->data_len = head->data_len - plen;
 		head->data_len -= clone->len;
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &ip6_frag_mem);
+		add_frag_mem_limit(&fq->q, clone->truesize);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
 	nhoff = fq->nhoffset;
-	head->nh.raw[nhoff] = head->h.raw[0];
-	memmove(head->head + sizeof(struct frag_hdr), head->head, 
+	skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
+	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
-	head->mac.raw += sizeof(struct frag_hdr);
-	head->nh.raw += sizeof(struct frag_hdr);
+	head->mac_header += sizeof(struct frag_hdr);
+	head->network_header += sizeof(struct frag_hdr);
+
+	skb_reset_transport_header(head);
+	skb_push(head, head->data - skb_network_header(head));
 
-	skb_shinfo(head)->frag_list = head->next;
-	head->h.raw = head->data;
-	skb_push(head, head->data - head->nh.raw);
-	atomic_sub(head->truesize, &ip6_frag_mem);
+	sum_truesize = head->truesize;
+	for (fp = head->next; fp;) {
+		bool headstolen;
+		int delta;
+		struct sk_buff *next = fp->next;
 
-	for (fp=head->next; fp; fp = fp->next) {
-		head->data_len += fp->len;
-		head->len += fp->len;
+		sum_truesize += fp->truesize;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
-		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &ip6_frag_mem);
+
+		if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
+			kfree_skb_partial(fp, headstolen);
+		} else {
+			if (!skb_shinfo(head)->frag_list)
+				skb_shinfo(head)->frag_list = fp;
+			head->data_len += fp->len;
+			head->len += fp->len;
+			head->truesize += fp->truesize;
+		}
+		fp = next;
 	}
+	sub_frag_mem_limit(&fq->q, sum_truesize);
 
 	head->next = NULL;
 	head->dev = dev;
-	skb_set_timestamp(head, &fq->stamp);
-	head->nh.ipv6h->payload_len = htons(payload_len);
-
-	*skb_in = head;
+	head->tstamp = fq->q.stamp;
+	ipv6_hdr(head)->payload_len = htons(payload_len);
+	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
+	IP6CB(head)->nhoff = nhoff;
+	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
 
 	/* Yes, and fold redundant checksum back. 8) */
-	if (head->ip_summed == CHECKSUM_HW)
-		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
-
-	IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
-	fq->fragments = NULL;
-	*nhoffp = nhoff;
+	if (head->ip_summed == CHECKSUM_COMPLETE)
+		head->csum = csum_partial(skb_network_header(head),
+					  skb_network_header_len(head),
+					  head->csum);
+
+	rcu_read_lock();
+	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+	rcu_read_unlock();
+	fq->q.fragments = NULL;
+	fq->q.fragments_tail = NULL;
 	return 1;
 
 out_oversize:
-	if (net_ratelimit())
-		printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len);
+	net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len);
 	goto out_fail;
 out_oom:
-	if (net_ratelimit())
-		printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
+	net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
 out_fail:
-	IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	rcu_read_lock();
+	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+	rcu_read_unlock();
 	return -1;
 }
 
-static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+static int ipv6_frag_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp; 
-	struct net_device *dev = skb->dev;
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
-	struct ipv6hdr *hdr;
+	const struct ipv6hdr *hdr = ipv6_hdr(skb);
+	struct net *net = dev_net(skb_dst(skb)->dev);
+	int evicted;
 
-	hdr = skb->nh.ipv6h;
+	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+		goto fail_hdr;
 
-	IP6_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
 
 	/* Jumbo payload inhibits frag. header */
-	if (hdr->payload_len==0) {
-		IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
-		return -1;
-	}
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) {
-		IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
-		return -1;
-	}
+	if (hdr->payload_len==0)
+		goto fail_hdr;
 
-	hdr = skb->nh.ipv6h;
-	fhdr = (struct frag_hdr *)skb->h.raw;
+	if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 sizeof(struct frag_hdr))))
+		goto fail_hdr;
+
+	hdr = ipv6_hdr(skb);
+	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
 		/* It is not a fragmented frame */
-		skb->h.raw += sizeof(struct frag_hdr);
-		IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
+		skb->transport_header += sizeof(struct frag_hdr);
+		IP6_INC_STATS_BH(net,
+				 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
 
-		*nhoffp = (u8*)fhdr - skb->nh.raw;
+		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
+		IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
 		return 1;
 	}
 
-	if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
-		ip6_evictor();
-
-	if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) {
-		int ret = -1;
+	evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
+	if (evicted)
+		IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+				 IPSTATS_MIB_REASMFAILS, evicted);
 
-		spin_lock(&fq->lock);
+	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
+		     ip6_frag_ecn(hdr));
+	if (fq != NULL) {
+		int ret;
 
-		ip6_frag_queue(fq, skb, fhdr, *nhoffp);
+		spin_lock(&fq->q.lock);
 
-		if (fq->last_in == (FIRST_IN|LAST_IN) &&
-		    fq->meat == fq->len)
-			ret = ip6_frag_reasm(fq, skbp, nhoffp, dev);
+		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
 
-		spin_unlock(&fq->lock);
-		fq_put(fq, NULL);
+		spin_unlock(&fq->q.lock);
+		inet_frag_put(&fq->q, &ip6_frags);
 		return ret;
 	}
 
-	IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
 	return -1;
+
+fail_hdr:
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
+	return -1;
 }
 
-static struct inet6_protocol frag_protocol =
+static const struct inet6_protocol frag_protocol =
 {
 	.handler	=	ipv6_frag_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY,
 };
 
-void __init ipv6_frag_init(void)
+#ifdef CONFIG_SYSCTL
+static struct ctl_table ip6_frags_ns_ctl_table[] = {
+	{
+		.procname	= "ip6frag_high_thresh",
+		.data		= &init_net.ipv6.frags.high_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "ip6frag_low_thresh",
+		.data		= &init_net.ipv6.frags.low_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "ip6frag_time",
+		.data		= &init_net.ipv6.frags.timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+
+static struct ctl_table ip6_frags_ctl_table[] = {
+	{
+		.procname	= "ip6frag_secret_interval",
+		.data		= &ip6_frags.secret_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+
+static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 {
-	if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
-		printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
+	struct ctl_table *table;
+	struct ctl_table_header *hdr;
+
+	table = ip6_frags_ns_ctl_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
+		if (table == NULL)
+			goto err_alloc;
+
+		table[0].data = &net->ipv6.frags.high_thresh;
+		table[1].data = &net->ipv6.frags.low_thresh;
+		table[2].data = &net->ipv6.frags.timeout;
+
+		/* Don't export sysctls to unprivileged users */
+		if (net->user_ns != &init_user_ns)
+			table[0].procname = NULL;
+	}
+
+	hdr = register_net_sysctl(net, "net/ipv6", table);
+	if (hdr == NULL)
+		goto err_reg;
+
+	net->ipv6.sysctl.frags_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+static struct ctl_table_header *ip6_ctl_header;
+
+static int ip6_frags_sysctl_register(void)
+{
+	ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
+			ip6_frags_ctl_table);
+	return ip6_ctl_header == NULL ? -ENOMEM : 0;
+}
+
+static void ip6_frags_sysctl_unregister(void)
+{
+	unregister_net_sysctl_table(ip6_ctl_header);
+}
+#else
+static inline int ip6_frags_ns_sysctl_register(struct net *net)
+{
+	return 0;
+}
+
+static inline void ip6_frags_ns_sysctl_unregister(struct net *net)
+{
+}
+
+static inline int ip6_frags_sysctl_register(void)
+{
+	return 0;
+}
+
+static inline void ip6_frags_sysctl_unregister(void)
+{
+}
+#endif
+
+static int __net_init ipv6_frags_init_net(struct net *net)
+{
+	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+
+	inet_frags_init_net(&net->ipv6.frags);
+
+	return ip6_frags_ns_sysctl_register(net);
+}
 
-	ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
-				   (jiffies ^ (jiffies >> 6)));
+static void __net_exit ipv6_frags_exit_net(struct net *net)
+{
+	ip6_frags_ns_sysctl_unregister(net);
+	inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+}
 
-	init_timer(&ip6_frag_secret_timer);
-	ip6_frag_secret_timer.function = ip6_frag_secret_rebuild;
-	ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval;
-	add_timer(&ip6_frag_secret_timer);
+static struct pernet_operations ip6_frags_ops = {
+	.init = ipv6_frags_init_net,
+	.exit = ipv6_frags_exit_net,
+};
+
+int __init ipv6_frag_init(void)
+{
+	int ret;
+
+	ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+	if (ret)
+		goto out;
+
+	ret = ip6_frags_sysctl_register();
+	if (ret)
+		goto err_sysctl;
+
+	ret = register_pernet_subsys(&ip6_frags_ops);
+	if (ret)
+		goto err_pernet;
+
+	ip6_frags.hashfn = ip6_hashfn;
+	ip6_frags.constructor = ip6_frag_init;
+	ip6_frags.destructor = NULL;
+	ip6_frags.skb_free = NULL;
+	ip6_frags.qsize = sizeof(struct frag_queue);
+	ip6_frags.match = ip6_frag_match;
+	ip6_frags.frag_expire = ip6_frag_expire;
+	ip6_frags.secret_interval = 10 * 60 * HZ;
+	inet_frags_init(&ip6_frags);
+out:
+	return ret;
+
+err_pernet:
+	ip6_frags_sysctl_unregister();
+err_sysctl:
+	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+	goto out;
+}
+
+void ipv6_frag_exit(void)
+{
+	inet_frags_fini(&ip6_frags);
+	ip6_frags_sysctl_unregister();
+	unregister_pernet_subsys(&ip6_frags_ops);
+	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 227e99ed510..f23fbd28a50 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3,9 +3,7 @@
  *	FIB front-end.
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
- *
- *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -22,10 +20,15 @@
  *		routers in REACHABLE, STALE, DELAY or PROBE states).
  *		- always select the same router if it is (probably)
  *		reachable.  otherwise, round-robin the list.
+ *	Ville Nuorvala
+ *		Fixed routing subtrees.
  */
 
-#include <linux/config.h>
+#define pr_fmt(fmt) "IPv6: " fmt
+
+#include <linux/capability.h>
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <linux/types.h>
 #include <linux/times.h>
 #include <linux/socket.h>
@@ -34,15 +37,14 @@
 #include <linux/route.h>
 #include <linux/netdevice.h>
 #include <linux/in6.h>
+#include <linux/mroute6.h>
 #include <linux/init.h>
-#include <linux/netlink.h>
 #include <linux/if_arp.h>
-
-#ifdef 	CONFIG_PROC_FS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#endif
-
+#include <linux/nsproxy.h>
+#include <linux/slab.h>
+#include <net/net_namespace.h>
 #include <net/snmp.h>
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
@@ -53,6 +55,9 @@
 #include <linux/rtnetlink.h>
 #include <net/dst.h>
 #include <net/xfrm.h>
+#include <net/netevent.h>
+#include <net/netlink.h>
+#include <net/nexthop.h>
 
 #include <asm/uaccess.h>
 
@@ -60,98 +65,282 @@
 #include <linux/sysctl.h>
 #endif
 
-/* Set to 3 to get tracing. */
-#define RT6_DEBUG 2
-
-#if RT6_DEBUG >= 3
-#define RDBG(x) printk x
-#define RT6_TRACE(x...) printk(KERN_DEBUG x)
-#else
-#define RDBG(x)
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
-
-static int ip6_rt_max_size = 4096;
-static int ip6_rt_gc_min_interval = HZ / 2;
-static int ip6_rt_gc_timeout = 60*HZ;
-int ip6_rt_gc_interval = 30*HZ;
-static int ip6_rt_gc_elasticity = 9;
-static int ip6_rt_mtu_expires = 10*60*HZ;
-static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+enum rt6_nud_state {
+	RT6_NUD_FAIL_HARD = -3,
+	RT6_NUD_FAIL_PROBE = -2,
+	RT6_NUD_FAIL_DO_RR = -1,
+	RT6_NUD_SUCCEED = 1
+};
 
-static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
+static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
+				    const struct in6_addr *dest);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
+static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
+static unsigned int	 ip6_mtu(const struct dst_entry *dst);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static void		ip6_dst_destroy(struct dst_entry *);
 static void		ip6_dst_ifdown(struct dst_entry *,
 				       struct net_device *dev, int how);
-static int		 ip6_dst_gc(void);
+static int		 ip6_dst_gc(struct dst_ops *ops);
 
 static int		ip6_pkt_discard(struct sk_buff *skb);
-static int		ip6_pkt_discard_out(struct sk_buff *skb);
+static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
+static int		ip6_pkt_prohibit(struct sk_buff *skb);
+static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
 static void		ip6_link_failure(struct sk_buff *skb);
-static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+					   struct sk_buff *skb, u32 mtu);
+static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+					struct sk_buff *skb);
+static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
+
+#ifdef CONFIG_IPV6_ROUTE_INFO
+static struct rt6_info *rt6_add_route_info(struct net *net,
+					   const struct in6_addr *prefix, int prefixlen,
+					   const struct in6_addr *gwaddr, int ifindex,
+					   unsigned int pref);
+static struct rt6_info *rt6_get_route_info(struct net *net,
+					   const struct in6_addr *prefix, int prefixlen,
+					   const struct in6_addr *gwaddr, int ifindex);
+#endif
+
+static void rt6_bind_peer(struct rt6_info *rt, int create)
+{
+	struct inet_peer_base *base;
+	struct inet_peer *peer;
 
-static struct dst_ops ip6_dst_ops = {
+	base = inetpeer_base_ptr(rt->_rt6i_peer);
+	if (!base)
+		return;
+
+	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
+	if (peer) {
+		if (!rt6_set_peer(rt, peer))
+			inet_putpeer(peer);
+	}
+}
+
+static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
+{
+	if (rt6_has_peer(rt))
+		return rt6_peer_ptr(rt);
+
+	rt6_bind_peer(rt, create);
+	return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
+}
+
+static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
+{
+	return __rt6_get_peer(rt, 1);
+}
+
+static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+	struct rt6_info *rt = (struct rt6_info *) dst;
+	struct inet_peer *peer;
+	u32 *p = NULL;
+
+	if (!(rt->dst.flags & DST_HOST))
+		return NULL;
+
+	peer = rt6_get_peer_create(rt);
+	if (peer) {
+		u32 *old_p = __DST_METRICS_PTR(old);
+		unsigned long prev, new;
+
+		p = peer->metrics;
+		if (inet_metrics_new(peer) ||
+		    (old & DST_METRICS_FORCE_OVERWRITE))
+			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+		new = (unsigned long) p;
+		prev = cmpxchg(&dst->_metrics, old, new);
+
+		if (prev != old) {
+			p = __DST_METRICS_PTR(prev);
+			if (prev & DST_METRICS_READ_ONLY)
+				p = NULL;
+		}
+	}
+	return p;
+}
+
+static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+					     struct sk_buff *skb,
+					     const void *daddr)
+{
+	struct in6_addr *p = &rt->rt6i_gateway;
+
+	if (!ipv6_addr_any(p))
+		return (const void *) p;
+	else if (skb)
+		return &ipv6_hdr(skb)->daddr;
+	return daddr;
+}
+
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
+					  struct sk_buff *skb,
+					  const void *daddr)
+{
+	struct rt6_info *rt = (struct rt6_info *) dst;
+	struct neighbour *n;
+
+	daddr = choose_neigh_daddr(rt, skb, daddr);
+	n = __ipv6_neigh_lookup(dst->dev, daddr);
+	if (n)
+		return n;
+	return neigh_create(&nd_tbl, daddr, dst->dev);
+}
+
+static struct dst_ops ip6_dst_ops_template = {
 	.family			=	AF_INET6,
-	.protocol		=	__constant_htons(ETH_P_IPV6),
+	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 	.gc			=	ip6_dst_gc,
 	.gc_thresh		=	1024,
 	.check			=	ip6_dst_check,
+	.default_advmss		=	ip6_default_advmss,
+	.mtu			=	ip6_mtu,
+	.cow_metrics		=	ipv6_cow_metrics,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
 	.negative_advice	=	ip6_negative_advice,
 	.link_failure		=	ip6_link_failure,
 	.update_pmtu		=	ip6_rt_update_pmtu,
-	.entry_size		=	sizeof(struct rt6_info),
+	.redirect		=	rt6_do_redirect,
+	.local_out		=	__ip6_local_out,
+	.neigh_lookup		=	ip6_neigh_lookup,
 };
 
-struct rt6_info ip6_null_entry = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.dev		= &loopback_dev,
-			.obsolete	= -1,
-			.error		= -ENETUNREACH,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_discard,
-			.output		= ip6_pkt_discard_out,
-			.ops		= &ip6_dst_ops,
-			.path		= (struct dst_entry*)&ip6_null_entry,
-		}
+static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
+{
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	return mtu ? : dst->dev->mtu;
+}
+
+static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+					 struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+				      struct sk_buff *skb)
+{
+}
+
+static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
+					 unsigned long old)
+{
+	return NULL;
+}
+
+static struct dst_ops ip6_dst_blackhole_ops = {
+	.family			=	AF_INET6,
+	.protocol		=	cpu_to_be16(ETH_P_IPV6),
+	.destroy		=	ip6_dst_destroy,
+	.check			=	ip6_dst_check,
+	.mtu			=	ip6_blackhole_mtu,
+	.default_advmss		=	ip6_default_advmss,
+	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
+	.redirect		=	ip6_rt_blackhole_redirect,
+	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
+	.neigh_lookup		=	ip6_neigh_lookup,
+};
+
+static const u32 ip6_template_metrics[RTAX_MAX] = {
+	[RTAX_HOPLIMIT - 1] = 0,
+};
+
+static const struct rt6_info ip6_null_entry_template = {
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= DST_OBSOLETE_FORCE_CHK,
+		.error		= -ENETUNREACH,
+		.input		= ip6_pkt_discard,
+		.output		= ip6_pkt_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32) 0,
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
-struct fib6_node ip6_routing_table = {
-	.leaf		= &ip6_null_entry,
-	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
-};
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
-/* Protects all the ip6 fib */
+static const struct rt6_info ip6_prohibit_entry_template = {
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= DST_OBSOLETE_FORCE_CHK,
+		.error		= -EACCES,
+		.input		= ip6_pkt_prohibit,
+		.output		= ip6_pkt_prohibit_out,
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_protocol  = RTPROT_KERNEL,
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
+};
 
-DEFINE_RWLOCK(rt6_lock);
+static const struct rt6_info ip6_blk_hole_entry_template = {
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= DST_OBSOLETE_FORCE_CHK,
+		.error		= -EINVAL,
+		.input		= dst_discard,
+		.output		= dst_discard_sk,
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_protocol  = RTPROT_KERNEL,
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
+};
 
+#endif
 
 /* allocate dst with ip6_dst_ops */
-static __inline__ struct rt6_info *ip6_dst_alloc(void)
+static inline struct rt6_info *ip6_dst_alloc(struct net *net,
+					     struct net_device *dev,
+					     int flags,
+					     struct fib6_table *table)
 {
-	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
+	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
+					0, DST_OBSOLETE_FORCE_CHK, flags);
+
+	if (rt) {
+		struct dst_entry *dst = &rt->dst;
+
+		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
+		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+		rt->rt6i_genid = rt_genid_ipv6(net);
+		INIT_LIST_HEAD(&rt->rt6i_siblings);
+	}
+	return rt;
 }
 
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct inet6_dev *idev = rt->rt6i_idev;
+	struct dst_entry *from = dst->from;
+
+	if (!(rt->dst.flags & DST_HOST))
+		dst_destroy_metrics_generic(dst);
 
-	if (idev != NULL) {
+	if (idev) {
 		rt->rt6i_idev = NULL;
 		in6_dev_put(idev);
-	}	
+	}
+
+	dst->from = NULL;
+	dst_release(from);
+
+	if (rt6_has_peer(rt)) {
+		struct inet_peer *peer = rt6_peer_ptr(rt);
+		inet_putpeer(peer);
+	}
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -159,425 +348,742 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct inet6_dev *idev = rt->rt6i_idev;
-
-	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
-		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
-		if (loopback_idev != NULL) {
-			rt->rt6i_idev = loopback_idev;
-			in6_dev_put(idev);
+	struct net_device *loopback_dev =
+		dev_net(dev)->loopback_dev;
+
+	if (dev != loopback_dev) {
+		if (idev && idev->dev == dev) {
+			struct inet6_dev *loopback_idev =
+				in6_dev_get(loopback_dev);
+			if (loopback_idev) {
+				rt->rt6i_idev = loopback_idev;
+				in6_dev_put(idev);
+			}
 		}
 	}
 }
 
-static __inline__ int rt6_check_expired(const struct rt6_info *rt)
+static bool rt6_check_expired(const struct rt6_info *rt)
 {
-	return (rt->rt6i_flags & RTF_EXPIRES &&
-		time_after(jiffies, rt->rt6i_expires));
+	if (rt->rt6i_flags & RTF_EXPIRES) {
+		if (time_after(jiffies, rt->dst.expires))
+			return true;
+	} else if (rt->dst.from) {
+		return rt6_check_expired((struct rt6_info *) rt->dst.from);
+	}
+	return false;
+}
+
+/* Multipath route selection:
+ *   Hash based function using packet header and flowlabel.
+ * Adapted from fib_info_hashfn()
+ */
+static int rt6_info_hash_nhsfn(unsigned int candidate_count,
+			       const struct flowi6 *fl6)
+{
+	unsigned int val = fl6->flowi6_proto;
+
+	val ^= ipv6_addr_hash(&fl6->daddr);
+	val ^= ipv6_addr_hash(&fl6->saddr);
+
+	/* Work only if this not encapsulated */
+	switch (fl6->flowi6_proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_SCTP:
+		val ^= (__force u16)fl6->fl6_sport;
+		val ^= (__force u16)fl6->fl6_dport;
+		break;
+
+	case IPPROTO_ICMPV6:
+		val ^= (__force u16)fl6->fl6_icmp_type;
+		val ^= (__force u16)fl6->fl6_icmp_code;
+		break;
+	}
+	/* RFC6438 recommands to use flowlabel */
+	val ^= (__force u32)fl6->flowlabel;
+
+	/* Perhaps, we need to tune, this function? */
+	val = val ^ (val >> 7) ^ (val >> 12);
+	return val % candidate_count;
+}
+
+static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+					     struct flowi6 *fl6, int oif,
+					     int strict)
+{
+	struct rt6_info *sibling, *next_sibling;
+	int route_choosen;
+
+	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+	/* Don't change the route, if route_choosen == 0
+	 * (siblings does not include ourself)
+	 */
+	if (route_choosen)
+		list_for_each_entry_safe(sibling, next_sibling,
+				&match->rt6i_siblings, rt6i_siblings) {
+			route_choosen--;
+			if (route_choosen == 0) {
+				if (rt6_score_route(sibling, oif, strict) < 0)
+					break;
+				match = sibling;
+				break;
+			}
+		}
+	return match;
 }
 
 /*
- *	Route lookup. Any rt6_lock is implied.
+ *	Route lookup. Any table->tb6_lock is implied.
  */
 
-static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
+static inline struct rt6_info *rt6_device_match(struct net *net,
+						    struct rt6_info *rt,
+						    const struct in6_addr *saddr,
 						    int oif,
-						    int strict)
+						    int flags)
 {
 	struct rt6_info *local = NULL;
 	struct rt6_info *sprt;
 
-	if (oif) {
-		for (sprt = rt; sprt; sprt = sprt->u.next) {
-			struct net_device *dev = sprt->rt6i_dev;
+	if (!oif && ipv6_addr_any(saddr))
+		goto out;
+
+	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
+		struct net_device *dev = sprt->dst.dev;
+
+		if (oif) {
 			if (dev->ifindex == oif)
 				return sprt;
 			if (dev->flags & IFF_LOOPBACK) {
-				if (sprt->rt6i_idev == NULL ||
+				if (!sprt->rt6i_idev ||
 				    sprt->rt6i_idev->dev->ifindex != oif) {
-					if (strict && oif)
+					if (flags & RT6_LOOKUP_F_IFACE && oif)
 						continue;
-					if (local && (!oif || 
+					if (local && (!oif ||
 						      local->rt6i_idev->dev->ifindex == oif))
 						continue;
 				}
 				local = sprt;
 			}
+		} else {
+			if (ipv6_chk_addr(net, saddr, dev,
+					  flags & RT6_LOOKUP_F_IFACE))
+				return sprt;
 		}
+	}
 
+	if (oif) {
 		if (local)
 			return local;
 
-		if (strict)
-			return &ip6_null_entry;
+		if (flags & RT6_LOOKUP_F_IFACE)
+			return net->ipv6.ip6_null_entry;
 	}
+out:
 	return rt;
 }
 
+#ifdef CONFIG_IPV6_ROUTER_PREF
+struct __rt6_probe_work {
+	struct work_struct work;
+	struct in6_addr target;
+	struct net_device *dev;
+};
+
+static void rt6_probe_deferred(struct work_struct *w)
+{
+	struct in6_addr mcaddr;
+	struct __rt6_probe_work *work =
+		container_of(w, struct __rt6_probe_work, work);
+
+	addrconf_addr_solict_mult(&work->target, &mcaddr);
+	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+	dev_put(work->dev);
+	kfree(w);
+}
+
+static void rt6_probe(struct rt6_info *rt)
+{
+	struct neighbour *neigh;
+	/*
+	 * Okay, this does not seem to be appropriate
+	 * for now, however, we need to check if it
+	 * is really so; aka Router Reachability Probing.
+	 *
+	 * Router Reachability Probe MUST be rate-limited
+	 * to no more than one per minute.
+	 */
+	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
+		return;
+	rcu_read_lock_bh();
+	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	if (neigh) {
+		write_lock(&neigh->lock);
+		if (neigh->nud_state & NUD_VALID)
+			goto out;
+	}
+
+	if (!neigh ||
+	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
+		struct __rt6_probe_work *work;
+
+		work = kmalloc(sizeof(*work), GFP_ATOMIC);
+
+		if (neigh && work)
+			__neigh_set_probe_once(neigh);
+
+		if (neigh)
+			write_unlock(&neigh->lock);
+
+		if (work) {
+			INIT_WORK(&work->work, rt6_probe_deferred);
+			work->target = rt->rt6i_gateway;
+			dev_hold(rt->dst.dev);
+			work->dev = rt->dst.dev;
+			schedule_work(&work->work);
+		}
+	} else {
+out:
+		write_unlock(&neigh->lock);
+	}
+	rcu_read_unlock_bh();
+}
+#else
+static inline void rt6_probe(struct rt6_info *rt)
+{
+}
+#endif
+
 /*
- *	pointer to the last default router chosen. BH is disabled locally.
+ * Default Router Selection (RFC 2461 6.3.6)
  */
-static struct rt6_info *rt6_dflt_pointer;
-static DEFINE_SPINLOCK(rt6_dflt_lock);
+static inline int rt6_check_dev(struct rt6_info *rt, int oif)
+{
+	struct net_device *dev = rt->dst.dev;
+	if (!oif || dev->ifindex == oif)
+		return 2;
+	if ((dev->flags & IFF_LOOPBACK) &&
+	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
+		return 1;
+	return 0;
+}
 
-void rt6_reset_dflt_pointer(struct rt6_info *rt)
+static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 {
-	spin_lock_bh(&rt6_dflt_lock);
-	if (rt == NULL || rt == rt6_dflt_pointer) {
-		RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
-		rt6_dflt_pointer = NULL;
+	struct neighbour *neigh;
+	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
+
+	if (rt->rt6i_flags & RTF_NONEXTHOP ||
+	    !(rt->rt6i_flags & RTF_GATEWAY))
+		return RT6_NUD_SUCCEED;
+
+	rcu_read_lock_bh();
+	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	if (neigh) {
+		read_lock(&neigh->lock);
+		if (neigh->nud_state & NUD_VALID)
+			ret = RT6_NUD_SUCCEED;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+		else if (!(neigh->nud_state & NUD_FAILED))
+			ret = RT6_NUD_SUCCEED;
+		else
+			ret = RT6_NUD_FAIL_PROBE;
+#endif
+		read_unlock(&neigh->lock);
+	} else {
+		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
+		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 	}
-	spin_unlock_bh(&rt6_dflt_lock);
+	rcu_read_unlock_bh();
+
+	return ret;
 }
 
-/* Default Router Selection (RFC 2461 6.3.6) */
-static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
+static int rt6_score_route(struct rt6_info *rt, int oif,
+			   int strict)
 {
-	struct rt6_info *match = NULL;
-	struct rt6_info *sprt;
-	int mpri = 0;
+	int m;
 
-	for (sprt = rt; sprt; sprt = sprt->u.next) {
-		struct neighbour *neigh;
-		int m = 0;
+	m = rt6_check_dev(rt, oif);
+	if (!m && (strict & RT6_LOOKUP_F_IFACE))
+		return RT6_NUD_FAIL_HARD;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
+#endif
+	if (strict & RT6_LOOKUP_F_REACHABLE) {
+		int n = rt6_check_neigh(rt);
+		if (n < 0)
+			return n;
+	}
+	return m;
+}
 
-		if (!oif ||
-		    (sprt->rt6i_dev &&
-		     sprt->rt6i_dev->ifindex == oif))
-			m += 8;
+static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
+				   int *mpri, struct rt6_info *match,
+				   bool *do_rr)
+{
+	int m;
+	bool match_do_rr = false;
 
-		if (rt6_check_expired(sprt))
-			continue;
+	if (rt6_check_expired(rt))
+		goto out;
 
-		if (sprt == rt6_dflt_pointer)
-			m += 4;
+	m = rt6_score_route(rt, oif, strict);
+	if (m == RT6_NUD_FAIL_DO_RR) {
+		match_do_rr = true;
+		m = 0; /* lowest valid score */
+	} else if (m == RT6_NUD_FAIL_HARD) {
+		goto out;
+	}
 
-		if ((neigh = sprt->rt6i_nexthop) != NULL) {
-			read_lock_bh(&neigh->lock);
-			switch (neigh->nud_state) {
-			case NUD_REACHABLE:
-				m += 3;
-				break;
+	if (strict & RT6_LOOKUP_F_REACHABLE)
+		rt6_probe(rt);
 
-			case NUD_STALE:
-			case NUD_DELAY:
-			case NUD_PROBE:
-				m += 2;
-				break;
+	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
+	if (m > *mpri) {
+		*do_rr = match_do_rr;
+		*mpri = m;
+		match = rt;
+	}
+out:
+	return match;
+}
 
-			case NUD_NOARP:
-			case NUD_PERMANENT:
-				m += 1;
-				break;
+static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
+				     struct rt6_info *rr_head,
+				     u32 metric, int oif, int strict,
+				     bool *do_rr)
+{
+	struct rt6_info *rt, *match;
+	int mpri = -1;
 
-			case NUD_INCOMPLETE:
-			default:
-				read_unlock_bh(&neigh->lock);
-				continue;
-			}
-			read_unlock_bh(&neigh->lock);
-		} else {
-			continue;
-		}
+	match = NULL;
+	for (rt = rr_head; rt && rt->rt6i_metric == metric;
+	     rt = rt->dst.rt6_next)
+		match = find_match(rt, oif, strict, &mpri, match, do_rr);
+	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
+	     rt = rt->dst.rt6_next)
+		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 
-		if (m > mpri || m >= 12) {
-			match = sprt;
-			mpri = m;
-			if (m >= 12) {
-				/* we choose the last default router if it
-				 * is in (probably) reachable state.
-				 * If route changed, we should do pmtu
-				 * discovery. --yoshfuji
-				 */
-				break;
-			}
-		}
-	}
+	return match;
+}
 
-	spin_lock(&rt6_dflt_lock);
-	if (!match) {
-		/*
-		 *	No default routers are known to be reachable.
-		 *	SHOULD round robin
-		 */
-		if (rt6_dflt_pointer) {
-			for (sprt = rt6_dflt_pointer->u.next;
-			     sprt; sprt = sprt->u.next) {
-				if (sprt->u.dst.obsolete <= 0 &&
-				    sprt->u.dst.error == 0 &&
-				    !rt6_check_expired(sprt)) {
-					match = sprt;
-					break;
-				}
-			}
-			for (sprt = rt;
-			     !match && sprt;
-			     sprt = sprt->u.next) {
-				if (sprt->u.dst.obsolete <= 0 &&
-				    sprt->u.dst.error == 0 &&
-				    !rt6_check_expired(sprt)) {
-					match = sprt;
-					break;
-				}
-				if (sprt == rt6_dflt_pointer)
-					break;
-			}
-		}
+static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+{
+	struct rt6_info *match, *rt0;
+	struct net *net;
+	bool do_rr = false;
+
+	rt0 = fn->rr_ptr;
+	if (!rt0)
+		fn->rr_ptr = rt0 = fn->leaf;
+
+	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
+			     &do_rr);
+
+	if (do_rr) {
+		struct rt6_info *next = rt0->dst.rt6_next;
+
+		/* no entries matched; do round-robin */
+		if (!next || next->rt6i_metric != rt0->rt6i_metric)
+			next = fn->leaf;
+
+		if (next != rt0)
+			fn->rr_ptr = next;
 	}
 
-	if (match) {
-		if (rt6_dflt_pointer != match)
-			RT6_TRACE("changed default router: %p->%p\n",
-				  rt6_dflt_pointer, match);
-		rt6_dflt_pointer = match;
+	net = dev_net(rt0->dst.dev);
+	return match ? match : net->ipv6.ip6_null_entry;
+}
+
+#ifdef CONFIG_IPV6_ROUTE_INFO
+int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
+		  const struct in6_addr *gwaddr)
+{
+	struct net *net = dev_net(dev);
+	struct route_info *rinfo = (struct route_info *) opt;
+	struct in6_addr prefix_buf, *prefix;
+	unsigned int pref;
+	unsigned long lifetime;
+	struct rt6_info *rt;
+
+	if (len < sizeof(struct route_info)) {
+		return -EINVAL;
 	}
-	spin_unlock(&rt6_dflt_lock);
 
-	if (!match) {
-		/*
-		 * Last Resort: if no default routers found, 
-		 * use addrconf default route.
-		 * We don't record this route.
-		 */
-		for (sprt = ip6_routing_table.leaf;
-		     sprt; sprt = sprt->u.next) {
-			if (!rt6_check_expired(sprt) &&
-			    (sprt->rt6i_flags & RTF_DEFAULT) &&
-			    (!oif ||
-			     (sprt->rt6i_dev &&
-			      sprt->rt6i_dev->ifindex == oif))) {
-				match = sprt;
-				break;
-			}
+	/* Sanity check for prefix_len and length */
+	if (rinfo->length > 3) {
+		return -EINVAL;
+	} else if (rinfo->prefix_len > 128) {
+		return -EINVAL;
+	} else if (rinfo->prefix_len > 64) {
+		if (rinfo->length < 2) {
+			return -EINVAL;
 		}
-		if (!match) {
-			/* no default route.  give up. */
-			match = &ip6_null_entry;
+	} else if (rinfo->prefix_len > 0) {
+		if (rinfo->length < 1) {
+			return -EINVAL;
 		}
 	}
 
-	return match;
+	pref = rinfo->route_pref;
+	if (pref == ICMPV6_ROUTER_PREF_INVALID)
+		return -EINVAL;
+
+	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
+
+	if (rinfo->length == 3)
+		prefix = (struct in6_addr *)rinfo->prefix;
+	else {
+		/* this function is safe */
+		ipv6_addr_prefix(&prefix_buf,
+				 (struct in6_addr *)rinfo->prefix,
+				 rinfo->prefix_len);
+		prefix = &prefix_buf;
+	}
+
+	if (rinfo->prefix_len == 0)
+		rt = rt6_get_dflt_router(gwaddr, dev);
+	else
+		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
+					gwaddr, dev->ifindex);
+
+	if (rt && !lifetime) {
+		ip6_del_rt(rt);
+		rt = NULL;
+	}
+
+	if (!rt && lifetime)
+		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
+					pref);
+	else if (rt)
+		rt->rt6i_flags = RTF_ROUTEINFO |
+				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+
+	if (rt) {
+		if (!addrconf_finite_timeout(lifetime))
+			rt6_clean_expires(rt);
+		else
+			rt6_set_expires(rt, jiffies + HZ * lifetime);
+
+		ip6_rt_put(rt);
+	}
+	return 0;
 }
+#endif
 
-struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
-			    int oif, int strict)
+#define BACKTRACK(__net, saddr)			\
+do { \
+	if (rt == __net->ipv6.ip6_null_entry) {	\
+		struct fib6_node *pn; \
+		while (1) { \
+			if (fn->fn_flags & RTN_TL_ROOT) \
+				goto out; \
+			pn = fn->parent; \
+			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
+				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
+			else \
+				fn = pn; \
+			if (fn->fn_flags & RTN_RTINFO) \
+				goto restart; \
+		} \
+	} \
+} while (0)
+
+static struct rt6_info *ip6_pol_route_lookup(struct net *net,
+					     struct fib6_table *table,
+					     struct flowi6 *fl6, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 
-	read_lock_bh(&rt6_lock);
-	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
-	rt = rt6_device_match(fn->leaf, oif, strict);
-	dst_hold(&rt->u.dst);
-	rt->u.dst.__use++;
-	read_unlock_bh(&rt6_lock);
-
-	rt->u.dst.lastuse = jiffies;
-	if (rt->u.dst.error == 0)
-		return rt;
-	dst_release(&rt->u.dst);
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+restart:
+	rt = fn->leaf;
+	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
+	BACKTRACK(net, &fl6->saddr);
+out:
+	dst_use(&rt->dst, jiffies);
+	read_unlock_bh(&table->tb6_lock);
+	return rt;
+
+}
+
+struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
+				    int flags)
+{
+	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+}
+EXPORT_SYMBOL_GPL(ip6_route_lookup);
+
+struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
+			    const struct in6_addr *saddr, int oif, int strict)
+{
+	struct flowi6 fl6 = {
+		.flowi6_oif = oif,
+		.daddr = *daddr,
+	};
+	struct dst_entry *dst;
+	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
+
+	if (saddr) {
+		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
+		flags |= RT6_LOOKUP_F_HAS_SADDR;
+	}
+
+	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+	if (dst->error == 0)
+		return (struct rt6_info *) dst;
+
+	dst_release(dst);
+
 	return NULL;
 }
 
-/* ip6_ins_rt is called with FREE rt6_lock.
+EXPORT_SYMBOL(rt6_lookup);
+
+/* ip6_ins_rt is called with FREE table->tb6_lock.
    It takes new route entry, the addition fails by any reason the
    route is freed. In any case, if caller does not hold it, it may
    be destroyed.
  */
 
-int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
-		void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
+			struct nlattr *mx, int mx_len)
 {
 	int err;
+	struct fib6_table *table;
 
-	write_lock_bh(&rt6_lock);
-	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
-	write_unlock_bh(&rt6_lock);
+	table = rt->rt6i_table;
+	write_lock_bh(&table->tb6_lock);
+	err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
+	write_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
 
-/* No rt6_lock! If COW failed, the function returns dead route entry
-   with dst->error set to errno value.
- */
+int ip6_ins_rt(struct rt6_info *rt)
+{
+	struct nl_info info = {
+		.nl_net = dev_net(rt->dst.dev),
+	};
+	return __ip6_ins_rt(rt, &info, NULL, 0);
+}
 
-static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
-				struct in6_addr *saddr, struct netlink_skb_parms *req)
+static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
+				      const struct in6_addr *daddr,
+				      const struct in6_addr *saddr)
 {
-	int err;
 	struct rt6_info *rt;
 
 	/*
 	 *	Clone the route.
 	 */
 
-	rt = ip6_rt_copy(ort);
+	rt = ip6_rt_copy(ort, daddr);
 
 	if (rt) {
-		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
+		if (ort->rt6i_dst.plen != 128 &&
+		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
+			rt->rt6i_flags |= RTF_ANYCAST;
 
-		if (!(rt->rt6i_flags&RTF_GATEWAY))
-			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
-
-		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->u.dst.flags |= DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
 		if (rt->rt6i_src.plen && saddr) {
-			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
+			rt->rt6i_src.addr = *saddr;
 			rt->rt6i_src.plen = 128;
 		}
 #endif
-
-		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
-
-		dst_hold(&rt->u.dst);
-
-		err = ip6_ins_rt(rt, NULL, NULL, req);
-		if (err == 0)
-			return rt;
-
-		rt->u.dst.error = err;
-
-		return rt;
 	}
-	dst_hold(&ip6_null_entry.u.dst);
-	return &ip6_null_entry;
-}
 
-#define BACKTRACK() \
-if (rt == &ip6_null_entry && strict) { \
-       while ((fn = fn->parent) != NULL) { \
-		if (fn->fn_flags & RTN_ROOT) { \
-			dst_hold(&rt->u.dst); \
-			goto out; \
-		} \
-		if (fn->fn_flags & RTN_RTINFO) \
-			goto restart; \
-	} \
+	return rt;
 }
 
+static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
+					const struct in6_addr *daddr)
+{
+	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 
-void ip6_route_input(struct sk_buff *skb)
+	if (rt)
+		rt->rt6i_flags |= RTF_CACHE;
+	return rt;
+}
+
+static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
+				      struct flowi6 *fl6, int flags)
 {
 	struct fib6_node *fn;
-	struct rt6_info *rt;
-	int strict;
+	struct rt6_info *rt, *nrt;
+	int strict = 0;
 	int attempts = 3;
+	int err;
+	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 
-	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
+	strict |= flags & RT6_LOOKUP_F_IFACE;
 
 relookup:
-	read_lock_bh(&rt6_lock);
+	read_lock_bh(&table->tb6_lock);
 
-	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
-			 &skb->nh.ipv6h->saddr);
+restart_2:
+	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 restart:
-	rt = fn->leaf;
-
-	if ((rt->rt6i_flags & RTF_CACHE)) {
-		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
-		BACKTRACK();
-		dst_hold(&rt->u.dst);
+	rt = rt6_select(fn, oif, strict | reachable);
+	if (rt->rt6i_nsiblings)
+		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
+	BACKTRACK(net, &fl6->saddr);
+	if (rt == net->ipv6.ip6_null_entry ||
+	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
-	}
 
-	rt = rt6_device_match(rt, skb->dev->ifindex, strict);
-	BACKTRACK();
+	dst_hold(&rt->dst);
+	read_unlock_bh(&table->tb6_lock);
 
-	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
-		struct rt6_info *nrt;
-		dst_hold(&rt->u.dst);
-		read_unlock_bh(&rt6_lock);
-
-		nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
-			      &skb->nh.ipv6h->saddr,
-			      &NETLINK_CB(skb));
+	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
+		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
+	else if (!(rt->dst.flags & DST_HOST))
+		nrt = rt6_alloc_clone(rt, &fl6->daddr);
+	else
+		goto out2;
 
-		dst_release(&rt->u.dst);
-		rt = nrt;
+	ip6_rt_put(rt);
+	rt = nrt ? : net->ipv6.ip6_null_entry;
 
-		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
+	dst_hold(&rt->dst);
+	if (nrt) {
+		err = ip6_ins_rt(nrt);
+		if (!err)
 			goto out2;
-
-		/* Race condition! In the gap, when rt6_lock was
-		   released someone could insert this route.  Relookup.
-		*/
-		dst_release(&rt->u.dst);
-		goto relookup;
 	}
-	dst_hold(&rt->u.dst);
+
+	if (--attempts <= 0)
+		goto out2;
+
+	/*
+	 * Race condition! In the gap, when table->tb6_lock was
+	 * released someone could insert this route.  Relookup.
+	 */
+	ip6_rt_put(rt);
+	goto relookup;
 
 out:
-	read_unlock_bh(&rt6_lock);
+	if (reachable) {
+		reachable = 0;
+		goto restart_2;
+	}
+	dst_hold(&rt->dst);
+	read_unlock_bh(&table->tb6_lock);
 out2:
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.__use++;
-	skb->dst = (struct dst_entry *) rt;
+	rt->dst.lastuse = jiffies;
+	rt->dst.__use++;
+
+	return rt;
 }
 
-struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
+					    struct flowi6 *fl6, int flags)
 {
-	struct fib6_node *fn;
-	struct rt6_info *rt;
-	int strict;
-	int attempts = 3;
+	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+}
 
-	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
+static struct dst_entry *ip6_route_input_lookup(struct net *net,
+						struct net_device *dev,
+						struct flowi6 *fl6, int flags)
+{
+	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
+		flags |= RT6_LOOKUP_F_IFACE;
 
-relookup:
-	read_lock_bh(&rt6_lock);
+	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+}
 
-	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
+void ip6_route_input(struct sk_buff *skb)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct net *net = dev_net(skb->dev);
+	int flags = RT6_LOOKUP_F_HAS_SADDR;
+	struct flowi6 fl6 = {
+		.flowi6_iif = skb->dev->ifindex,
+		.daddr = iph->daddr,
+		.saddr = iph->saddr,
+		.flowlabel = ip6_flowinfo(iph),
+		.flowi6_mark = skb->mark,
+		.flowi6_proto = iph->nexthdr,
+	};
 
-restart:
-	rt = fn->leaf;
+	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+}
 
-	if ((rt->rt6i_flags & RTF_CACHE)) {
-		rt = rt6_device_match(rt, fl->oif, strict);
-		BACKTRACK();
-		dst_hold(&rt->u.dst);
-		goto out;
-	}
-	if (rt->rt6i_flags & RTF_DEFAULT) {
-		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
-			rt = rt6_best_dflt(rt, fl->oif);
-	} else {
-		rt = rt6_device_match(rt, fl->oif, strict);
-		BACKTRACK();
-	}
+static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
+					     struct flowi6 *fl6, int flags)
+{
+	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+}
 
-	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
-		struct rt6_info *nrt;
-		dst_hold(&rt->u.dst);
-		read_unlock_bh(&rt6_lock);
+struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
+				    struct flowi6 *fl6)
+{
+	int flags = 0;
 
-		nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
+	fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
-		dst_release(&rt->u.dst);
-		rt = nrt;
+	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
+		flags |= RT6_LOOKUP_F_IFACE;
 
-		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
-			goto out2;
+	if (!ipv6_addr_any(&fl6->saddr))
+		flags |= RT6_LOOKUP_F_HAS_SADDR;
+	else if (sk)
+		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
+
+	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+}
+
+EXPORT_SYMBOL(ip6_route_output);
+
+struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
+{
+	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
+	struct dst_entry *new = NULL;
+
+	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
+	if (rt) {
+		new = &rt->dst;
 
-		/* Race condition! In the gap, when rt6_lock was
-		   released someone could insert this route.  Relookup.
-		*/
-		dst_release(&rt->u.dst);
-		goto relookup;
+		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+		rt6_init_peer(rt, net->ipv6.peers);
+
+		new->__use = 1;
+		new->input = dst_discard;
+		new->output = dst_discard_sk;
+
+		if (dst_metrics_read_only(&ort->dst))
+			new->_metrics = ort->dst._metrics;
+		else
+			dst_copy_metrics(new, &ort->dst);
+		rt->rt6i_idev = ort->rt6i_idev;
+		if (rt->rt6i_idev)
+			in6_dev_hold(rt->rt6i_idev);
+
+		rt->rt6i_gateway = ort->rt6i_gateway;
+		rt->rt6i_flags = ort->rt6i_flags;
+		rt->rt6i_metric = 0;
+
+		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
+#ifdef CONFIG_IPV6_SUBTREES
+		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
+#endif
+
+		dst_free(new);
 	}
-	dst_hold(&rt->u.dst);
 
-out:
-	read_unlock_bh(&rt6_lock);
-out2:
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.__use++;
-	return &rt->u.dst;
+	dst_release(dst_orig);
+	return new ? new : ERR_PTR(-ENOMEM);
 }
 
-
 /*
  *	Destination cache support functions
  */
@@ -588,10 +1094,20 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 	rt = (struct rt6_info *) dst;
 
-	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
-		return dst;
+	/* All IPV6 dsts are created with ->obsolete set to the value
+	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
+	 * into this function always.
+	 */
+	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
+		return NULL;
 
-	return NULL;
+	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+		return NULL;
+
+	if (rt6_check_expired(rt))
+		return NULL;
+
+	return dst;
 }
 
 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -599,59 +1115,218 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 	struct rt6_info *rt = (struct rt6_info *) dst;
 
 	if (rt) {
-		if (rt->rt6i_flags & RTF_CACHE)
-			ip6_del_rt(rt, NULL, NULL, NULL);
-		else
+		if (rt->rt6i_flags & RTF_CACHE) {
+			if (rt6_check_expired(rt)) {
+				ip6_del_rt(rt);
+				dst = NULL;
+			}
+		} else {
 			dst_release(dst);
+			dst = NULL;
+		}
 	}
-	return NULL;
+	return dst;
 }
 
 static void ip6_link_failure(struct sk_buff *skb)
 {
 	struct rt6_info *rt;
 
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 
-	rt = (struct rt6_info *) skb->dst;
+	rt = (struct rt6_info *) skb_dst(skb);
 	if (rt) {
-		if (rt->rt6i_flags&RTF_CACHE) {
-			dst_set_expires(&rt->u.dst, 0);
-			rt->rt6i_flags |= RTF_EXPIRES;
-		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
+		if (rt->rt6i_flags & RTF_CACHE) {
+			dst_hold(&rt->dst);
+			if (ip6_del_rt(rt))
+				dst_free(&rt->dst);
+		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
 			rt->rt6i_node->fn_sernum = -1;
+		}
 	}
 }
 
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			       struct sk_buff *skb, u32 mtu)
 {
 	struct rt6_info *rt6 = (struct rt6_info*)dst;
 
+	dst_confirm(dst);
 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+		struct net *net = dev_net(dst->dev);
+
 		rt6->rt6i_flags |= RTF_MODIFIED;
 		if (mtu < IPV6_MIN_MTU) {
+			u32 features = dst_metric(dst, RTAX_FEATURES);
 			mtu = IPV6_MIN_MTU;
-			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+			features |= RTAX_FEATURE_ALLFRAG;
+			dst_metric_set(dst, RTAX_FEATURES, features);
 		}
-		dst->metrics[RTAX_MTU-1] = mtu;
+		dst_metric_set(dst, RTAX_MTU, mtu);
+		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
 	}
 }
 
-/* Protected by rt6_lock.  */
-static struct dst_entry *ndisc_dst_gc_list;
-static int ipv6_get_mtu(struct net_device *dev);
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+		     int oif, u32 mark)
+{
+	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
+	fl6.daddr = iph->daddr;
+	fl6.saddr = iph->saddr;
+	fl6.flowlabel = ip6_flowinfo(iph);
+
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (!dst->error)
+		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
+	dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+	ip6_update_pmtu(skb, sock_net(sk), mtu,
+			sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+
+/* Handle redirects */
+struct ip6rd_flowi {
+	struct flowi6 fl6;
+	struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct net *net,
+					     struct fib6_table *table,
+					     struct flowi6 *fl6,
+					     int flags)
+{
+	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
+	struct rt6_info *rt;
+	struct fib6_node *fn;
+
+	/* Get the "current" route for this destination and
+	 * check if the redirect has come from approriate router.
+	 *
+	 * RFC 4861 specifies that redirects should only be
+	 * accepted if they come from the nexthop to the target.
+	 * Due to the way the routes are chosen, this notion
+	 * is a bit fuzzy and one might need to check all possible
+	 * routes.
+	 */
+
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+restart:
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+		if (rt6_check_expired(rt))
+			continue;
+		if (rt->dst.error)
+			break;
+		if (!(rt->rt6i_flags & RTF_GATEWAY))
+			continue;
+		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
+			continue;
+		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+			continue;
+		break;
+	}
+
+	if (!rt)
+		rt = net->ipv6.ip6_null_entry;
+	else if (rt->dst.error) {
+		rt = net->ipv6.ip6_null_entry;
+		goto out;
+	}
+	BACKTRACK(net, &fl6->saddr);
+out:
+	dst_hold(&rt->dst);
+
+	read_unlock_bh(&table->tb6_lock);
+
+	return rt;
+};
+
+static struct dst_entry *ip6_route_redirect(struct net *net,
+					const struct flowi6 *fl6,
+					const struct in6_addr *gateway)
+{
+	int flags = RT6_LOOKUP_F_HAS_SADDR;
+	struct ip6rd_flowi rdfl;
+
+	rdfl.fl6 = *fl6;
+	rdfl.gateway = *gateway;
+
+	return fib6_rule_lookup(net, &rdfl.fl6,
+				flags, __ip6_route_redirect);
+}
+
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
+{
+	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_iif = LOOPBACK_IFINDEX;
+	fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = mark;
+	fl6.daddr = iph->daddr;
+	fl6.saddr = iph->saddr;
+	fl6.flowlabel = ip6_flowinfo(iph);
+
+	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+	rt6_do_redirect(dst, NULL, skb);
+	dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_redirect);
 
-static inline unsigned int ipv6_advmss(unsigned int mtu)
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
+			    u32 mark)
 {
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_iif = LOOPBACK_IFINDEX;
+	fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = mark;
+	fl6.daddr = msg->dest;
+	fl6.saddr = iph->daddr;
+
+	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+	rt6_do_redirect(dst, NULL, skb);
+	dst_release(dst);
+}
+
+void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
+{
+	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_redirect);
+
+static unsigned int ip6_default_advmss(const struct dst_entry *dst)
+{
+	struct net_device *dev = dst->dev;
+	unsigned int mtu = dst_mtu(dst);
+	struct net *net = dev_net(dev);
+
 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
 
-	if (mtu < ip6_rt_min_advmss)
-		mtu = ip6_rt_min_advmss;
+	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
+		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
 
 	/*
-	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
-	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
-	 * IPV6_MAXPLEN is also valid and means: "any MSS, 
+	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
+	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
+	 * IPV6_MAXPLEN is also valid and means: "any MSS,
 	 * rely only on pmtu discovery"
 	 */
 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
@@ -659,158 +1334,159 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
 	return mtu;
 }
 
-struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
-				  struct neighbour *neigh,
-				  struct in6_addr *addr,
-				  int (*output)(struct sk_buff *))
+static unsigned int ip6_mtu(const struct dst_entry *dst)
 {
+	struct inet6_dev *idev;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	if (mtu)
+		goto out;
+
+	mtu = IPV6_MIN_MTU;
+
+	rcu_read_lock();
+	idev = __in6_dev_get(dst->dev);
+	if (idev)
+		mtu = idev->cnf.mtu6;
+	rcu_read_unlock();
+
+out:
+	return min_t(unsigned int, mtu, IP6_MAX_MTU);
+}
+
+static struct dst_entry *icmp6_dst_gc_list;
+static DEFINE_SPINLOCK(icmp6_dst_lock);
+
+struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
+				  struct flowi6 *fl6)
+{
+	struct dst_entry *dst;
 	struct rt6_info *rt;
 	struct inet6_dev *idev = in6_dev_get(dev);
+	struct net *net = dev_net(dev);
 
-	if (unlikely(idev == NULL))
-		return NULL;
+	if (unlikely(!idev))
+		return ERR_PTR(-ENODEV);
 
-	rt = ip6_dst_alloc();
-	if (unlikely(rt == NULL)) {
+	rt = ip6_dst_alloc(net, dev, 0, NULL);
+	if (unlikely(!rt)) {
 		in6_dev_put(idev);
+		dst = ERR_PTR(-ENOMEM);
 		goto out;
 	}
 
-	dev_hold(dev);
-	if (neigh)
-		neigh_hold(neigh);
-	else
-		neigh = ndisc_get_neigh(dev, addr);
-
-	rt->rt6i_dev	  = dev;
-	rt->rt6i_idev     = idev;
-	rt->rt6i_nexthop  = neigh;
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
-	rt->u.dst.output  = output;
-
-#if 0	/* there's no chance to use these for ndisc */
-	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
-				? DST_HOST 
-				: 0;
-	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+	rt->dst.flags |= DST_HOST;
+	rt->dst.output  = ip6_output;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->rt6i_gateway  = fl6->daddr;
+	rt->rt6i_dst.addr = fl6->daddr;
 	rt->rt6i_dst.plen = 128;
-#endif
+	rt->rt6i_idev     = idev;
+	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
+
+	spin_lock_bh(&icmp6_dst_lock);
+	rt->dst.next = icmp6_dst_gc_list;
+	icmp6_dst_gc_list = &rt->dst;
+	spin_unlock_bh(&icmp6_dst_lock);
 
-	write_lock_bh(&rt6_lock);
-	rt->u.dst.next = ndisc_dst_gc_list;
-	ndisc_dst_gc_list = &rt->u.dst;
-	write_unlock_bh(&rt6_lock);
+	fib6_force_start_gc(net);
 
-	fib6_force_start_gc();
+	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
 
 out:
-	return (struct dst_entry *)rt;
+	return dst;
 }
 
-int ndisc_dst_gc(int *more)
+int icmp6_dst_gc(void)
 {
-	struct dst_entry *dst, *next, **pprev;
-	int freed;
+	struct dst_entry *dst, **pprev;
+	int more = 0;
+
+	spin_lock_bh(&icmp6_dst_lock);
+	pprev = &icmp6_dst_gc_list;
 
-	next = NULL;
-	pprev = &ndisc_dst_gc_list;
-	freed = 0;
 	while ((dst = *pprev) != NULL) {
 		if (!atomic_read(&dst->__refcnt)) {
 			*pprev = dst->next;
 			dst_free(dst);
-			freed++;
 		} else {
 			pprev = &dst->next;
-			(*more)++;
+			++more;
 		}
 	}
 
-	return freed;
-}
-
-static int ip6_dst_gc(void)
-{
-	static unsigned expire = 30*HZ;
-	static unsigned long last_gc;
-	unsigned long now = jiffies;
-
-	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
-	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
-		goto out;
-
-	expire++;
-	fib6_run_gc(expire);
-	last_gc = now;
-	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
-		expire = ip6_rt_gc_timeout>>1;
+	spin_unlock_bh(&icmp6_dst_lock);
 
-out:
-	expire -= expire>>ip6_rt_gc_elasticity;
-	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
+	return more;
 }
 
-/* Clean host part of a prefix. Not necessary in radix tree,
-   but results in cleaner routing tables.
-
-   Remove it only when all the things will work!
- */
-
-static int ipv6_get_mtu(struct net_device *dev)
+static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
+			    void *arg)
 {
-	int mtu = IPV6_MIN_MTU;
-	struct inet6_dev *idev;
+	struct dst_entry *dst, **pprev;
 
-	idev = in6_dev_get(dev);
-	if (idev) {
-		mtu = idev->cnf.mtu6;
-		in6_dev_put(idev);
+	spin_lock_bh(&icmp6_dst_lock);
+	pprev = &icmp6_dst_gc_list;
+	while ((dst = *pprev) != NULL) {
+		struct rt6_info *rt = (struct rt6_info *) dst;
+		if (func(rt, arg)) {
+			*pprev = dst->next;
+			dst_free(dst);
+		} else {
+			pprev = &dst->next;
+		}
 	}
-	return mtu;
+	spin_unlock_bh(&icmp6_dst_lock);
 }
 
-int ipv6_get_hoplimit(struct net_device *dev)
+static int ip6_dst_gc(struct dst_ops *ops)
 {
-	int hoplimit = ipv6_devconf.hop_limit;
-	struct inet6_dev *idev;
+	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
+	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
+	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
+	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
+	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
+	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+	int entries;
+
+	entries = dst_entries_get_fast(ops);
+	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
+	    entries <= rt_max_size)
+		goto out;
 
-	idev = in6_dev_get(dev);
-	if (idev) {
-		hoplimit = idev->cnf.hop_limit;
-		in6_dev_put(idev);
-	}
-	return hoplimit;
+	net->ipv6.ip6_rt_gc_expire++;
+	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
+	entries = dst_entries_get_slow(ops);
+	if (entries < ops->gc_thresh)
+		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
+out:
+	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
+	return entries > rt_max_size;
 }
 
 /*
  *
  */
 
-int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
-		void *_rtattr, struct netlink_skb_parms *req)
+int ip6_route_add(struct fib6_config *cfg)
 {
 	int err;
-	struct rtmsg *r;
-	struct rtattr **rta;
+	struct net *net = cfg->fc_nlinfo.nl_net;
 	struct rt6_info *rt = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
+	struct fib6_table *table;
 	int addr_type;
 
-	rta = (struct rtattr **) _rtattr;
-
-	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
+	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
 		return -EINVAL;
 #ifndef CONFIG_IPV6_SUBTREES
-	if (rtmsg->rtmsg_src_len)
+	if (cfg->fc_src_len)
 		return -EINVAL;
 #endif
-	if (rtmsg->rtmsg_ifindex) {
+	if (cfg->fc_ifindex) {
 		err = -ENODEV;
-		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
+		dev = dev_get_by_index(net, cfg->fc_ifindex);
 		if (!dev)
 			goto out;
 		idev = in6_dev_get(dev);
@@ -818,59 +1494,80 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 			goto out;
 	}
 
-	if (rtmsg->rtmsg_metric == 0)
-		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
+	if (cfg->fc_metric == 0)
+		cfg->fc_metric = IP6_RT_PRIO_USER;
+
+	err = -ENOBUFS;
+	if (cfg->fc_nlinfo.nlh &&
+	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
+		table = fib6_get_table(net, cfg->fc_table);
+		if (!table) {
+			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
+			table = fib6_new_table(net, cfg->fc_table);
+		}
+	} else {
+		table = fib6_new_table(net, cfg->fc_table);
+	}
+
+	if (!table)
+		goto out;
 
-	rt = ip6_dst_alloc();
+	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
 
-	if (rt == NULL) {
+	if (!rt) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	rt->u.dst.obsolete = -1;
-	rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
-	if (nlh && (r = NLMSG_DATA(nlh))) {
-		rt->rt6i_protocol = r->rtm_protocol;
-	} else {
-		rt->rt6i_protocol = RTPROT_BOOT;
-	}
+	if (cfg->fc_flags & RTF_EXPIRES)
+		rt6_set_expires(rt, jiffies +
+				clock_t_to_jiffies(cfg->fc_expires));
+	else
+		rt6_clean_expires(rt);
 
-	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
+	if (cfg->fc_protocol == RTPROT_UNSPEC)
+		cfg->fc_protocol = RTPROT_BOOT;
+	rt->rt6i_protocol = cfg->fc_protocol;
+
+	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
 	if (addr_type & IPV6_ADDR_MULTICAST)
-		rt->u.dst.input = ip6_mc_input;
+		rt->dst.input = ip6_mc_input;
+	else if (cfg->fc_flags & RTF_LOCAL)
+		rt->dst.input = ip6_input;
 	else
-		rt->u.dst.input = ip6_forward;
+		rt->dst.input = ip6_forward;
 
-	rt->u.dst.output = ip6_output;
+	rt->dst.output = ip6_output;
 
-	ipv6_addr_prefix(&rt->rt6i_dst.addr, 
-			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
-	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
-	if (rt->rt6i_dst.plen == 128)
-	       rt->u.dst.flags = DST_HOST;
+	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
+	rt->rt6i_dst.plen = cfg->fc_dst_len;
+	if (rt->rt6i_dst.plen == 128) {
+		rt->dst.flags |= DST_HOST;
+		dst_metrics_set_force_overwrite(&rt->dst);
+	}
 
 #ifdef CONFIG_IPV6_SUBTREES
-	ipv6_addr_prefix(&rt->rt6i_src.addr, 
-			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
-	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
+	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
+	rt->rt6i_src.plen = cfg->fc_src_len;
 #endif
 
-	rt->rt6i_metric = rtmsg->rtmsg_metric;
+	rt->rt6i_metric = cfg->fc_metric;
 
 	/* We cannot add true routes via loopback here,
 	   they would result in kernel looping; promote them to reject routes
 	 */
-	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
-	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
+	if ((cfg->fc_flags & RTF_REJECT) ||
+	    (dev && (dev->flags & IFF_LOOPBACK) &&
+	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
+	     !(cfg->fc_flags & RTF_LOCAL))) {
 		/* hold loopback dev/idev if we haven't done so. */
-		if (dev != &loopback_dev) {
+		if (dev != net->loopback_dev) {
 			if (dev) {
 				dev_put(dev);
 				in6_dev_put(idev);
 			}
-			dev = &loopback_dev;
+			dev = net->loopback_dev;
 			dev_hold(dev);
 			idev = in6_dev_get(dev);
 			if (!idev) {
@@ -878,19 +1575,35 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 				goto out;
 			}
 		}
-		rt->u.dst.output = ip6_pkt_discard_out;
-		rt->u.dst.input = ip6_pkt_discard;
-		rt->u.dst.error = -ENETUNREACH;
 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
+		switch (cfg->fc_type) {
+		case RTN_BLACKHOLE:
+			rt->dst.error = -EINVAL;
+			rt->dst.output = dst_discard_sk;
+			rt->dst.input = dst_discard;
+			break;
+		case RTN_PROHIBIT:
+			rt->dst.error = -EACCES;
+			rt->dst.output = ip6_pkt_prohibit_out;
+			rt->dst.input = ip6_pkt_prohibit;
+			break;
+		case RTN_THROW:
+		default:
+			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
+					: -ENETUNREACH;
+			rt->dst.output = ip6_pkt_discard_out;
+			rt->dst.input = ip6_pkt_discard;
+			break;
+		}
 		goto install_route;
 	}
 
-	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
-		struct in6_addr *gw_addr;
+	if (cfg->fc_flags & RTF_GATEWAY) {
+		const struct in6_addr *gw_addr;
 		int gwa_type;
 
-		gw_addr = &rtmsg->rtmsg_gateway;
-		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
+		gw_addr = &cfg->fc_gateway;
+		rt->rt6i_gateway = *gw_addr;
 		gwa_type = ipv6_addr_type(gw_addr);
 
 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -904,80 +1617,61 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 			   some exceptions. --ANK
 			 */
 			err = -EINVAL;
-			if (!(gwa_type&IPV6_ADDR_UNICAST))
+			if (!(gwa_type & IPV6_ADDR_UNICAST))
 				goto out;
 
-			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
+			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
 
 			err = -EHOSTUNREACH;
-			if (grt == NULL)
+			if (!grt)
 				goto out;
 			if (dev) {
-				if (dev != grt->rt6i_dev) {
-					dst_release(&grt->u.dst);
+				if (dev != grt->dst.dev) {
+					ip6_rt_put(grt);
 					goto out;
 				}
 			} else {
-				dev = grt->rt6i_dev;
+				dev = grt->dst.dev;
 				idev = grt->rt6i_idev;
 				dev_hold(dev);
 				in6_dev_hold(grt->rt6i_idev);
 			}
-			if (!(grt->rt6i_flags&RTF_GATEWAY))
+			if (!(grt->rt6i_flags & RTF_GATEWAY))
 				err = 0;
-			dst_release(&grt->u.dst);
+			ip6_rt_put(grt);
 
 			if (err)
 				goto out;
 		}
 		err = -EINVAL;
-		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
+		if (!dev || (dev->flags & IFF_LOOPBACK))
 			goto out;
 	}
 
 	err = -ENODEV;
-	if (dev == NULL)
+	if (!dev)
 		goto out;
 
-	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
-		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
-		if (IS_ERR(rt->rt6i_nexthop)) {
-			err = PTR_ERR(rt->rt6i_nexthop);
-			rt->rt6i_nexthop = NULL;
+	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
+		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
+			err = -EINVAL;
 			goto out;
 		}
-	}
+		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
+		rt->rt6i_prefsrc.plen = 128;
+	} else
+		rt->rt6i_prefsrc.plen = 0;
 
-	rt->rt6i_flags = rtmsg->rtmsg_flags;
+	rt->rt6i_flags = cfg->fc_flags;
 
 install_route:
-	if (rta && rta[RTA_METRICS-1]) {
-		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
-		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
-
-		while (RTA_OK(attr, attrlen)) {
-			unsigned flavor = attr->rta_type;
-			if (flavor) {
-				if (flavor > RTAX_MAX) {
-					err = -EINVAL;
-					goto out;
-				}
-				rt->u.dst.metrics[flavor-1] =
-					*(u32 *)RTA_DATA(attr);
-			}
-			attr = RTA_NEXT(attr, attrlen);
-		}
-	}
-
-	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	if (!rt->u.dst.metrics[RTAX_MTU-1])
-		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
-	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
-	rt->u.dst.dev = dev;
+	rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
-	return ip6_ins_rt(rt, nlh, _rtattr, req);
+	rt->rt6i_table = table;
+
+	cfg->fc_nlinfo.nl_net = dev_net(dev);
+
+	return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
 
 out:
 	if (dev)
@@ -985,377 +1679,414 @@ out:
 	if (idev)
 		in6_dev_put(idev);
 	if (rt)
-		dst_free((struct dst_entry *) rt);
+		dst_free(&rt->dst);
 	return err;
 }
 
-int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 {
 	int err;
+	struct fib6_table *table;
+	struct net *net = dev_net(rt->dst.dev);
 
-	write_lock_bh(&rt6_lock);
-
-	rt6_reset_dflt_pointer(NULL);
-
-	err = fib6_del(rt, nlh, _rtattr, req);
-	dst_release(&rt->u.dst);
+	if (rt == net->ipv6.ip6_null_entry) {
+		err = -ENOENT;
+		goto out;
+	}
 
-	write_unlock_bh(&rt6_lock);
+	table = rt->rt6i_table;
+	write_lock_bh(&table->tb6_lock);
+	err = fib6_del(rt, info);
+	write_unlock_bh(&table->tb6_lock);
 
+out:
+	ip6_rt_put(rt);
 	return err;
 }
 
-static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int ip6_del_rt(struct rt6_info *rt)
+{
+	struct nl_info info = {
+		.nl_net = dev_net(rt->dst.dev),
+	};
+	return __ip6_del_rt(rt, &info);
+}
+
+static int ip6_route_del(struct fib6_config *cfg)
 {
+	struct fib6_table *table;
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 	int err = -ESRCH;
 
-	read_lock_bh(&rt6_lock);
+	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
+	if (!table)
+		return err;
+
+	read_lock_bh(&table->tb6_lock);
+
+	fn = fib6_locate(&table->tb6_root,
+			 &cfg->fc_dst, cfg->fc_dst_len,
+			 &cfg->fc_src, cfg->fc_src_len);
 
-	fn = fib6_locate(&ip6_routing_table,
-			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
-			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
-	
 	if (fn) {
-		for (rt = fn->leaf; rt; rt = rt->u.next) {
-			if (rtmsg->rtmsg_ifindex &&
-			    (rt->rt6i_dev == NULL ||
-			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
+		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+			if (cfg->fc_ifindex &&
+			    (!rt->dst.dev ||
+			     rt->dst.dev->ifindex != cfg->fc_ifindex))
 				continue;
-			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
-			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
+			if (cfg->fc_flags & RTF_GATEWAY &&
+			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
 				continue;
-			if (rtmsg->rtmsg_metric &&
-			    rtmsg->rtmsg_metric != rt->rt6i_metric)
+			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
 				continue;
-			dst_hold(&rt->u.dst);
-			read_unlock_bh(&rt6_lock);
+			dst_hold(&rt->dst);
+			read_unlock_bh(&table->tb6_lock);
 
-			return ip6_del_rt(rt, nlh, _rtattr, req);
+			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 		}
 	}
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
 
-/*
- *	Handle redirects
- */
-void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
-		  struct neighbour *neigh, u8 *lladdr, int on_link)
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 {
-	struct rt6_info *rt, *nrt;
+	struct net *net = dev_net(skb->dev);
+	struct netevent_redirect netevent;
+	struct rt6_info *rt, *nrt = NULL;
+	struct ndisc_options ndopts;
+	struct inet6_dev *in6_dev;
+	struct neighbour *neigh;
+	struct rd_msg *msg;
+	int optlen, on_link;
+	u8 *lladdr;
+
+	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
+	optlen -= sizeof(*msg);
+
+	if (optlen < 0) {
+		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
+		return;
+	}
 
-	/* Locate old route to this destination. */
-	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
+	msg = (struct rd_msg *)icmp6_hdr(skb);
 
-	if (rt == NULL)
+	if (ipv6_addr_is_multicast(&msg->dest)) {
+		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
 		return;
+	}
 
-	if (neigh->dev != rt->rt6i_dev)
-		goto out;
+	on_link = 0;
+	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
+		on_link = 1;
+	} else if (ipv6_addr_type(&msg->target) !=
+		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
+		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
+		return;
+	}
 
-	/*
-	 * Current route is on-link; redirect is always invalid.
-	 * 
-	 * Seems, previous statement is not true. It could
-	 * be node, which looks for us as on-link (f.e. proxy ndisc)
-	 * But then router serving it might decide, that we should
-	 * know truth 8)8) --ANK (980726).
-	 */
-	if (!(rt->rt6i_flags&RTF_GATEWAY))
-		goto out;
+	in6_dev = __in6_dev_get(skb->dev);
+	if (!in6_dev)
+		return;
+	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+		return;
 
-	/*
-	 *	RFC 2461 specifies that redirects should only be
-	 *	accepted if they come from the nexthop to the target.
-	 *	Due to the way default routers are chosen, this notion
-	 *	is a bit fuzzy and one might need to check all default
-	 *	routers.
+	/* RFC2461 8.1:
+	 *	The IP source address of the Redirect MUST be the same as the current
+	 *	first-hop router for the specified ICMP Destination Address.
 	 */
-	if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
-		if (rt->rt6i_flags & RTF_DEFAULT) {
-			struct rt6_info *rt1;
-
-			read_lock(&rt6_lock);
-			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
-				if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
-					dst_hold(&rt1->u.dst);
-					dst_release(&rt->u.dst);
-					read_unlock(&rt6_lock);
-					rt = rt1;
-					goto source_ok;
-				}
-			}
-			read_unlock(&rt6_lock);
+
+	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
+		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
+		return;
+	}
+
+	lladdr = NULL;
+	if (ndopts.nd_opts_tgt_lladdr) {
+		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
+					     skb->dev);
+		if (!lladdr) {
+			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
+			return;
 		}
-		if (net_ratelimit())
-			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
-			       "for redirect target\n");
-		goto out;
 	}
 
-source_ok:
+	rt = (struct rt6_info *) dst;
+	if (rt == net->ipv6.ip6_null_entry) {
+		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
+		return;
+	}
+
+	/* Redirect received -> path was valid.
+	 * Look, redirects are sent only in response to data packets,
+	 * so that this nexthop apparently is reachable. --ANK
+	 */
+	dst_confirm(&rt->dst);
+
+	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
+	if (!neigh)
+		return;
 
 	/*
 	 *	We have finally decided to accept it.
 	 */
 
-	neigh_update(neigh, lladdr, NUD_STALE, 
+	neigh_update(neigh, lladdr, NUD_STALE,
 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
 		     NEIGH_UPDATE_F_OVERRIDE|
 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
 				     NEIGH_UPDATE_F_ISROUTER))
 		     );
 
-	/*
-	 * Redirect received -> path was valid.
-	 * Look, redirects are sent only in response to data packets,
-	 * so that this nexthop apparently is reachable. --ANK
-	 */
-	dst_confirm(&rt->u.dst);
-
-	/* Duplicate redirect: silently ignore. */
-	if (neigh == rt->u.dst.neighbour)
-		goto out;
-
-	nrt = ip6_rt_copy(rt);
-	if (nrt == NULL)
+	nrt = ip6_rt_copy(rt, &msg->dest);
+	if (!nrt)
 		goto out;
 
 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
 	if (on_link)
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
-	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
-	nrt->rt6i_dst.plen = 128;
-	nrt->u.dst.flags |= DST_HOST;
-
-	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
-	nrt->rt6i_nexthop = neigh_clone(neigh);
-	/* Reset pmtu, it may be better */
-	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
-	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
+	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
-	if (ip6_ins_rt(nrt, NULL, NULL, NULL))
+	if (ip6_ins_rt(nrt))
 		goto out;
 
-	if (rt->rt6i_flags&RTF_CACHE) {
-		ip6_del_rt(rt, NULL, NULL, NULL);
-		return;
-	}
+	netevent.old = &rt->dst;
+	netevent.new = &nrt->dst;
+	netevent.daddr = &msg->dest;
+	netevent.neigh = neigh;
+	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
-out:
-        dst_release(&rt->u.dst);
-	return;
-}
-
-/*
- *	Handle ICMP "packet too big" messages
- *	i.e. Path MTU discovery
- */
-
-void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
-			struct net_device *dev, u32 pmtu)
-{
-	struct rt6_info *rt, *nrt;
-	int allfrag = 0;
-
-	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
-	if (rt == NULL)
-		return;
-
-	if (pmtu >= dst_mtu(&rt->u.dst))
-		goto out;
-
-	if (pmtu < IPV6_MIN_MTU) {
-		/*
-		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
-		 * MTU (1280) and a fragment header should always be included
-		 * after a node receiving Too Big message reporting PMTU is
-		 * less than the IPv6 Minimum Link MTU.
-		 */
-		pmtu = IPV6_MIN_MTU;
-		allfrag = 1;
-	}
-
-	/* New mtu received -> path was valid.
-	   They are sent only in response to data packets,
-	   so that this nexthop apparently is reachable. --ANK
-	 */
-	dst_confirm(&rt->u.dst);
-
-	/* Host route. If it is static, it would be better
-	   not to override it, but add new one, so that
-	   when cache entry will expire old pmtu
-	   would return automatically.
-	 */
 	if (rt->rt6i_flags & RTF_CACHE) {
-		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
-		if (allfrag)
-			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
-		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
-		goto out;
-	}
-
-	/* Network route.
-	   Two cases are possible:
-	   1. It is connected route. Action: COW
-	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
-	 */
-	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
-		nrt = rt6_cow(rt, daddr, saddr, NULL);
-		if (!nrt->u.dst.error) {
-			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
-			if (allfrag)
-				nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-			/* According to RFC 1981, detecting PMTU increase shouldn't be
-			   happened within 5 mins, the recommended timer is 10 mins.
-			   Here this route expiration time is set to ip6_rt_mtu_expires
-			   which is 10 mins. After 10 mins the decreased pmtu is expired
-			   and detecting PMTU increase will be automatically happened.
-			 */
-			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
-			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
-		}
-		dst_release(&nrt->u.dst);
-	} else {
-		nrt = ip6_rt_copy(rt);
-		if (nrt == NULL)
-			goto out;
-		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
-		nrt->rt6i_dst.plen = 128;
-		nrt->u.dst.flags |= DST_HOST;
-		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
-		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
-		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
-		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
-		if (allfrag)
-			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-		ip6_ins_rt(nrt, NULL, NULL, NULL);
+		rt = (struct rt6_info *) dst_clone(&rt->dst);
+		ip6_del_rt(rt);
 	}
 
 out:
-	dst_release(&rt->u.dst);
+	neigh_release(neigh);
 }
 
 /*
  *	Misc support functions
  */
 
-static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
+static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
+				    const struct in6_addr *dest)
 {
-	struct rt6_info *rt = ip6_dst_alloc();
+	struct net *net = dev_net(ort->dst.dev);
+	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
+					    ort->rt6i_table);
 
 	if (rt) {
-		rt->u.dst.input = ort->u.dst.input;
-		rt->u.dst.output = ort->u.dst.output;
+		rt->dst.input = ort->dst.input;
+		rt->dst.output = ort->dst.output;
+		rt->dst.flags |= DST_HOST;
 
-		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
-		rt->u.dst.dev = ort->u.dst.dev;
-		if (rt->u.dst.dev)
-			dev_hold(rt->u.dst.dev);
+		rt->rt6i_dst.addr = *dest;
+		rt->rt6i_dst.plen = 128;
+		dst_copy_metrics(&rt->dst, &ort->dst);
+		rt->dst.error = ort->dst.error;
 		rt->rt6i_idev = ort->rt6i_idev;
 		if (rt->rt6i_idev)
 			in6_dev_hold(rt->rt6i_idev);
-		rt->u.dst.lastuse = jiffies;
-		rt->rt6i_expires = 0;
+		rt->dst.lastuse = jiffies;
 
-		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
-		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
+		if (ort->rt6i_flags & RTF_GATEWAY)
+			rt->rt6i_gateway = ort->rt6i_gateway;
+		else
+			rt->rt6i_gateway = *dest;
+		rt->rt6i_flags = ort->rt6i_flags;
+		rt6_set_from(rt, ort);
 		rt->rt6i_metric = 0;
 
-		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 #ifdef CONFIG_IPV6_SUBTREES
 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 #endif
+		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
+		rt->rt6i_table = ort->rt6i_table;
 	}
 	return rt;
 }
 
-struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
-{	
-	struct rt6_info *rt;
+#ifdef CONFIG_IPV6_ROUTE_INFO
+static struct rt6_info *rt6_get_route_info(struct net *net,
+					   const struct in6_addr *prefix, int prefixlen,
+					   const struct in6_addr *gwaddr, int ifindex)
+{
 	struct fib6_node *fn;
+	struct rt6_info *rt = NULL;
+	struct fib6_table *table;
+
+	table = fib6_get_table(net, RT6_TABLE_INFO);
+	if (!table)
+		return NULL;
+
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
+	if (!fn)
+		goto out;
+
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+		if (rt->dst.dev->ifindex != ifindex)
+			continue;
+		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+			continue;
+		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
+			continue;
+		dst_hold(&rt->dst);
+		break;
+	}
+out:
+	read_unlock_bh(&table->tb6_lock);
+	return rt;
+}
+
+static struct rt6_info *rt6_add_route_info(struct net *net,
+					   const struct in6_addr *prefix, int prefixlen,
+					   const struct in6_addr *gwaddr, int ifindex,
+					   unsigned int pref)
+{
+	struct fib6_config cfg = {
+		.fc_table	= RT6_TABLE_INFO,
+		.fc_metric	= IP6_RT_PRIO_USER,
+		.fc_ifindex	= ifindex,
+		.fc_dst_len	= prefixlen,
+		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
+				  RTF_UP | RTF_PREF(pref),
+		.fc_nlinfo.portid = 0,
+		.fc_nlinfo.nlh = NULL,
+		.fc_nlinfo.nl_net = net,
+	};
 
-	fn = &ip6_routing_table;
+	cfg.fc_dst = *prefix;
+	cfg.fc_gateway = *gwaddr;
 
-	write_lock_bh(&rt6_lock);
-	for (rt = fn->leaf; rt; rt=rt->u.next) {
-		if (dev == rt->rt6i_dev &&
+	/* We should treat it as a default route if prefix length is 0. */
+	if (!prefixlen)
+		cfg.fc_flags |= RTF_DEFAULT;
+
+	ip6_route_add(&cfg);
+
+	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
+}
+#endif
+
+struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
+{
+	struct rt6_info *rt;
+	struct fib6_table *table;
+
+	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
+	if (!table)
+		return NULL;
+
+	read_lock_bh(&table->tb6_lock);
+	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
+		if (dev == rt->dst.dev &&
+		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 			break;
 	}
 	if (rt)
-		dst_hold(&rt->u.dst);
-	write_unlock_bh(&rt6_lock);
+		dst_hold(&rt->dst);
+	read_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
-struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
-				     struct net_device *dev)
+struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
+				     struct net_device *dev,
+				     unsigned int pref)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg = {
+		.fc_table	= RT6_TABLE_DFLT,
+		.fc_metric	= IP6_RT_PRIO_USER,
+		.fc_ifindex	= dev->ifindex,
+		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
+				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+		.fc_nlinfo.portid = 0,
+		.fc_nlinfo.nlh = NULL,
+		.fc_nlinfo.nl_net = dev_net(dev),
+	};
 
-	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
-	rtmsg.rtmsg_metric = 1024;
-	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
+	cfg.fc_gateway = *gwaddr;
 
-	rtmsg.rtmsg_ifindex = dev->ifindex;
+	ip6_route_add(&cfg);
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
 	return rt6_get_dflt_router(gwaddr, dev);
 }
 
-void rt6_purge_dflt_routers(void)
+void rt6_purge_dflt_routers(struct net *net)
 {
 	struct rt6_info *rt;
+	struct fib6_table *table;
+
+	/* NOTE: Keep consistent with rt6_get_dflt_router */
+	table = fib6_get_table(net, RT6_TABLE_DFLT);
+	if (!table)
+		return;
 
 restart:
-	read_lock_bh(&rt6_lock);
-	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
-		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
-			dst_hold(&rt->u.dst);
+	read_lock_bh(&table->tb6_lock);
+	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
+			dst_hold(&rt->dst);
+			read_unlock_bh(&table->tb6_lock);
+			ip6_del_rt(rt);
+			goto restart;
+		}
+	}
+	read_unlock_bh(&table->tb6_lock);
+}
 
-			rt6_reset_dflt_pointer(NULL);
+static void rtmsg_to_fib6_config(struct net *net,
+				 struct in6_rtmsg *rtmsg,
+				 struct fib6_config *cfg)
+{
+	memset(cfg, 0, sizeof(*cfg));
 
-			read_unlock_bh(&rt6_lock);
+	cfg->fc_table = RT6_TABLE_MAIN;
+	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
+	cfg->fc_metric = rtmsg->rtmsg_metric;
+	cfg->fc_expires = rtmsg->rtmsg_info;
+	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
+	cfg->fc_src_len = rtmsg->rtmsg_src_len;
+	cfg->fc_flags = rtmsg->rtmsg_flags;
 
-			ip6_del_rt(rt, NULL, NULL, NULL);
+	cfg->fc_nlinfo.nl_net = net;
 
-			goto restart;
-		}
-	}
-	read_unlock_bh(&rt6_lock);
+	cfg->fc_dst = rtmsg->rtmsg_dst;
+	cfg->fc_src = rtmsg->rtmsg_src;
+	cfg->fc_gateway = rtmsg->rtmsg_gateway;
 }
 
-int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
+int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 {
+	struct fib6_config cfg;
 	struct in6_rtmsg rtmsg;
 	int err;
 
 	switch(cmd) {
 	case SIOCADDRT:		/* Add a route */
 	case SIOCDELRT:		/* Delete a route */
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
 		err = copy_from_user(&rtmsg, arg,
 				     sizeof(struct in6_rtmsg));
 		if (err)
 			return -EFAULT;
-			
+
+		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
+
 		rtnl_lock();
 		switch (cmd) {
 		case SIOCADDRT:
-			err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
+			err = ip6_route_add(&cfg);
 			break;
 		case SIOCDELRT:
-			err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
+			err = ip6_route_del(&cfg);
 			break;
 		default:
 			err = -EINVAL;
@@ -1363,7 +2094,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 		rtnl_unlock();
 
 		return err;
-	};
+	}
 
 	return -EINVAL;
 }
@@ -1372,18 +2103,49 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
  *	Drop the packet on the floor
  */
 
-static int ip6_pkt_discard(struct sk_buff *skb)
+static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 {
-	IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
+	int type;
+	struct dst_entry *dst = skb_dst(skb);
+	switch (ipstats_mib_noroutes) {
+	case IPSTATS_MIB_INNOROUTES:
+		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
+		if (type == IPV6_ADDR_ANY) {
+			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+				      IPSTATS_MIB_INADDRERRORS);
+			break;
+		}
+		/* FALLTHROUGH */
+	case IPSTATS_MIB_OUTNOROUTES:
+		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+			      ipstats_mib_noroutes);
+		break;
+	}
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
 	kfree_skb(skb);
 	return 0;
 }
 
-static int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard(struct sk_buff *skb)
+{
+	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
+}
+
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
 {
-	skb->dev = skb->dst->dev;
-	return ip6_pkt_discard(skb);
+	skb->dev = skb_dst(skb)->dev;
+	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
+}
+
+static int ip6_pkt_prohibit(struct sk_buff *skb)
+{
+	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
+}
+
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
+{
+	skb->dev = skb_dst(skb)->dev;
+	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
 }
 
 /*
@@ -1392,64 +2154,138 @@ static int ip6_pkt_discard_out(struct sk_buff *skb)
 
 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 				    const struct in6_addr *addr,
-				    int anycast)
+				    bool anycast)
 {
-	struct rt6_info *rt = ip6_dst_alloc();
-
-	if (rt == NULL)
+	struct net *net = dev_net(idev->dev);
+	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
+					    DST_NOCOUNT, NULL);
+	if (!rt)
 		return ERR_PTR(-ENOMEM);
 
-	dev_hold(&loopback_dev);
 	in6_dev_hold(idev);
 
-	rt->u.dst.flags = DST_HOST;
-	rt->u.dst.input = ip6_input;
-	rt->u.dst.output = ip6_output;
-	rt->rt6i_dev = &loopback_dev;
+	rt->dst.flags |= DST_HOST;
+	rt->dst.input = ip6_input;
+	rt->dst.output = ip6_output;
 	rt->rt6i_idev = idev;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	rt->u.dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
-	if (!anycast)
+	if (anycast)
+		rt->rt6i_flags |= RTF_ANYCAST;
+	else
 		rt->rt6i_flags |= RTF_LOCAL;
-	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
-	if (rt->rt6i_nexthop == NULL) {
-		dst_free((struct dst_entry *) rt);
-		return ERR_PTR(-ENOMEM);
-	}
 
-	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+	rt->rt6i_gateway  = *addr;
+	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
+	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
+	atomic_set(&rt->dst.__refcnt, 1);
 
 	return rt;
 }
 
-static int fib6_ifdown(struct rt6_info *rt, void *arg)
+int ip6_route_get_saddr(struct net *net,
+			struct rt6_info *rt,
+			const struct in6_addr *daddr,
+			unsigned int prefs,
+			struct in6_addr *saddr)
+{
+	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
+	int err = 0;
+	if (rt->rt6i_prefsrc.plen)
+		*saddr = rt->rt6i_prefsrc.addr;
+	else
+		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
+					 daddr, prefs, saddr);
+	return err;
+}
+
+/* remove deleted ip from prefsrc entries */
+struct arg_dev_net_ip {
+	struct net_device *dev;
+	struct net *net;
+	struct in6_addr *addr;
+};
+
+static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
+{
+	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
+	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
+	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
+
+	if (((void *)rt->dst.dev == dev || !dev) &&
+	    rt != net->ipv6.ip6_null_entry &&
+	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+		/* remove prefsrc entry */
+		rt->rt6i_prefsrc.plen = 0;
+	}
+	return 0;
+}
+
+void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
+{
+	struct net *net = dev_net(ifp->idev->dev);
+	struct arg_dev_net_ip adni = {
+		.dev = ifp->idev->dev,
+		.net = net,
+		.addr = &ifp->addr,
+	};
+	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
+}
+
+#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
+
+/* Remove routers and update dst entries when gateway turn into host. */
+static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
 {
-	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
-	    rt != &ip6_null_entry) {
-		RT6_TRACE("deleted by ifdown %p\n", rt);
+	struct in6_addr *gateway = (struct in6_addr *)arg;
+
+	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
+	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
+	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
 		return -1;
 	}
 	return 0;
 }
 
-void rt6_ifdown(struct net_device *dev)
+void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
+{
+	fib6_clean_all(net, fib6_clean_tohost, gateway);
+}
+
+struct arg_dev_net {
+	struct net_device *dev;
+	struct net *net;
+};
+
+static int fib6_ifdown(struct rt6_info *rt, void *arg)
 {
-	write_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
-	write_unlock_bh(&rt6_lock);
+	const struct arg_dev_net *adn = arg;
+	const struct net_device *dev = adn->dev;
+
+	if ((rt->dst.dev == dev || !dev) &&
+	    rt != adn->net->ipv6.ip6_null_entry)
+		return -1;
+
+	return 0;
 }
 
-struct rt6_mtu_change_arg
+void rt6_ifdown(struct net *net, struct net_device *dev)
 {
+	struct arg_dev_net adn = {
+		.dev = dev,
+		.net = net,
+	};
+
+	fib6_clean_all(net, fib6_ifdown, &adn);
+	icmp6_clean_all(fib6_ifdown, &adn);
+}
+
+struct rt6_mtu_change_arg {
 	struct net_device *dev;
-	unsigned mtu;
+	unsigned int mtu;
 };
 
 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
@@ -1464,7 +2300,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	*/
 
 	idev = __in6_dev_get(arg->dev);
-	if (idev == NULL)
+	if (!idev)
 		return 0;
 
 	/* For administrative MTU increase, there is no way to discover
@@ -1481,102 +2317,230 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
 	   PMTU discouvery.
 	 */
-	if (rt->rt6i_dev == arg->dev &&
-	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
-            (dst_mtu(&rt->u.dst) > arg->mtu ||
-             (dst_mtu(&rt->u.dst) < arg->mtu &&
-	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
-		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
+	if (rt->dst.dev == arg->dev &&
+	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
+	    (dst_mtu(&rt->dst) >= arg->mtu ||
+	     (dst_mtu(&rt->dst) < arg->mtu &&
+	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
+		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+	}
 	return 0;
 }
 
-void rt6_mtu_change(struct net_device *dev, unsigned mtu)
+void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
 {
-	struct rt6_mtu_change_arg arg;
+	struct rt6_mtu_change_arg arg = {
+		.dev = dev,
+		.mtu = mtu,
+	};
 
-	arg.dev = dev;
-	arg.mtu = mtu;
-	read_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
-	read_unlock_bh(&rt6_lock);
+	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
 }
 
-static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
-			      struct in6_rtmsg *rtmsg)
+static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
+	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
+	[RTA_OIF]               = { .type = NLA_U32 },
+	[RTA_IIF]		= { .type = NLA_U32 },
+	[RTA_PRIORITY]          = { .type = NLA_U32 },
+	[RTA_METRICS]           = { .type = NLA_NESTED },
+	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
+};
+
+static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct fib6_config *cfg)
 {
-	memset(rtmsg, 0, sizeof(*rtmsg));
+	struct rtmsg *rtm;
+	struct nlattr *tb[RTA_MAX+1];
+	int err;
 
-	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
-	rtmsg->rtmsg_src_len = r->rtm_src_len;
-	rtmsg->rtmsg_flags = RTF_UP;
-	if (r->rtm_type == RTN_UNREACHABLE)
-		rtmsg->rtmsg_flags |= RTF_REJECT;
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+	if (err < 0)
+		goto errout;
+
+	err = -EINVAL;
+	rtm = nlmsg_data(nlh);
+	memset(cfg, 0, sizeof(*cfg));
+
+	cfg->fc_table = rtm->rtm_table;
+	cfg->fc_dst_len = rtm->rtm_dst_len;
+	cfg->fc_src_len = rtm->rtm_src_len;
+	cfg->fc_flags = RTF_UP;
+	cfg->fc_protocol = rtm->rtm_protocol;
+	cfg->fc_type = rtm->rtm_type;
+
+	if (rtm->rtm_type == RTN_UNREACHABLE ||
+	    rtm->rtm_type == RTN_BLACKHOLE ||
+	    rtm->rtm_type == RTN_PROHIBIT ||
+	    rtm->rtm_type == RTN_THROW)
+		cfg->fc_flags |= RTF_REJECT;
+
+	if (rtm->rtm_type == RTN_LOCAL)
+		cfg->fc_flags |= RTF_LOCAL;
+
+	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
+	cfg->fc_nlinfo.nlh = nlh;
+	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
+
+	if (tb[RTA_GATEWAY]) {
+		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
+		cfg->fc_flags |= RTF_GATEWAY;
+	}
 
-	if (rta[RTA_GATEWAY-1]) {
-		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
-		rtmsg->rtmsg_flags |= RTF_GATEWAY;
+	if (tb[RTA_DST]) {
+		int plen = (rtm->rtm_dst_len + 7) >> 3;
+
+		if (nla_len(tb[RTA_DST]) < plen)
+			goto errout;
+
+		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
 	}
-	if (rta[RTA_DST-1]) {
-		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
+
+	if (tb[RTA_SRC]) {
+		int plen = (rtm->rtm_src_len + 7) >> 3;
+
+		if (nla_len(tb[RTA_SRC]) < plen)
+			goto errout;
+
+		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
 	}
-	if (rta[RTA_SRC-1]) {
-		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
+
+	if (tb[RTA_PREFSRC])
+		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
+
+	if (tb[RTA_OIF])
+		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
+
+	if (tb[RTA_PRIORITY])
+		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
+
+	if (tb[RTA_METRICS]) {
+		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
+		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
 	}
-	if (rta[RTA_OIF-1]) {
-		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+
+	if (tb[RTA_TABLE])
+		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
+
+	if (tb[RTA_MULTIPATH]) {
+		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
+		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
 	}
-	if (rta[RTA_PRIORITY-1]) {
-		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int ip6_route_multipath(struct fib6_config *cfg, int add)
+{
+	struct fib6_config r_cfg;
+	struct rtnexthop *rtnh;
+	int remaining;
+	int attrlen;
+	int err = 0, last_err = 0;
+
+beginning:
+	rtnh = (struct rtnexthop *)cfg->fc_mp;
+	remaining = cfg->fc_mp_len;
+
+	/* Parse a Multipath Entry */
+	while (rtnh_ok(rtnh, remaining)) {
+		memcpy(&r_cfg, cfg, sizeof(*cfg));
+		if (rtnh->rtnh_ifindex)
+			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen > 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			if (nla) {
+				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+				r_cfg.fc_flags |= RTF_GATEWAY;
+			}
+		}
+		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+		if (err) {
+			last_err = err;
+			/* If we are trying to remove a route, do not stop the
+			 * loop when ip6_route_del() fails (because next hop is
+			 * already gone), we should try to remove all next hops.
+			 */
+			if (add) {
+				/* If add fails, we should try to delete all
+				 * next hops that have been already added.
+				 */
+				add = 0;
+				goto beginning;
+			}
+		}
+		/* Because each route is added like a single route we remove
+		 * this flag after the first nexthop (if there is a collision,
+		 * we have already fail to add the first nexthop:
+		 * fib6_add_rt2node() has reject it).
+		 */
+		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
+		rtnh = rtnh_next(rtnh, &remaining);
 	}
-	return 0;
+
+	return last_err;
 }
 
-int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
 {
-	struct rtmsg *r = NLMSG_DATA(nlh);
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg;
+	int err;
 
-	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
-		return -EINVAL;
-	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+	err = rtm_to_fib6_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	if (cfg.fc_mp)
+		return ip6_route_multipath(&cfg, 0);
+	else
+		return ip6_route_del(&cfg);
 }
 
-int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
 {
-	struct rtmsg *r = NLMSG_DATA(nlh);
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg;
+	int err;
 
-	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
-		return -EINVAL;
-	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+	err = rtm_to_fib6_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	if (cfg.fc_mp)
+		return ip6_route_multipath(&cfg, 1);
+	else
+		return ip6_route_add(&cfg);
 }
 
-struct rt6_rtnl_dump_arg
+static inline size_t rt6_nlmsg_size(void)
 {
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
-};
+	return NLMSG_ALIGN(sizeof(struct rtmsg))
+	       + nla_total_size(16) /* RTA_SRC */
+	       + nla_total_size(16) /* RTA_DST */
+	       + nla_total_size(16) /* RTA_GATEWAY */
+	       + nla_total_size(16) /* RTA_PREFSRC */
+	       + nla_total_size(4) /* RTA_TABLE */
+	       + nla_total_size(4) /* RTA_IIF */
+	       + nla_total_size(4) /* RTA_OIF */
+	       + nla_total_size(4) /* RTA_PRIORITY */
+	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
+	       + nla_total_size(sizeof(struct rta_cacheinfo));
+}
 
-static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
+static int rt6_fill_node(struct net *net,
+			 struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *dst, struct in6_addr *src,
-			 int iif, int type, u32 pid, u32 seq,
-			 int prefix, unsigned int flags)
+			 int iif, int type, u32 portid, u32 seq,
+			 int prefix, int nowait, unsigned int flags)
 {
 	struct rtmsg *rtm;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
-	struct rta_cacheinfo ci;
+	struct nlmsghdr *nlh;
+	long expires;
+	u32 table;
 
 	if (prefix) {	/* user wants prefix routes only */
 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -1585,281 +2549,289 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 		}
 	}
 
-	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
 	rtm->rtm_family = AF_INET6;
 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
 	rtm->rtm_src_len = rt->rt6i_src.plen;
 	rtm->rtm_tos = 0;
-	rtm->rtm_table = RT_TABLE_MAIN;
-	if (rt->rt6i_flags&RTF_REJECT)
-		rtm->rtm_type = RTN_UNREACHABLE;
-	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
+	if (rt->rt6i_table)
+		table = rt->rt6i_table->tb6_id;
+	else
+		table = RT6_TABLE_UNSPEC;
+	rtm->rtm_table = table;
+	if (nla_put_u32(skb, RTA_TABLE, table))
+		goto nla_put_failure;
+	if (rt->rt6i_flags & RTF_REJECT) {
+		switch (rt->dst.error) {
+		case -EINVAL:
+			rtm->rtm_type = RTN_BLACKHOLE;
+			break;
+		case -EACCES:
+			rtm->rtm_type = RTN_PROHIBIT;
+			break;
+		case -EAGAIN:
+			rtm->rtm_type = RTN_THROW;
+			break;
+		default:
+			rtm->rtm_type = RTN_UNREACHABLE;
+			break;
+		}
+	}
+	else if (rt->rt6i_flags & RTF_LOCAL)
+		rtm->rtm_type = RTN_LOCAL;
+	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
 		rtm->rtm_type = RTN_LOCAL;
 	else
 		rtm->rtm_type = RTN_UNICAST;
 	rtm->rtm_flags = 0;
 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
 	rtm->rtm_protocol = rt->rt6i_protocol;
-	if (rt->rt6i_flags&RTF_DYNAMIC)
+	if (rt->rt6i_flags & RTF_DYNAMIC)
 		rtm->rtm_protocol = RTPROT_REDIRECT;
-	else if (rt->rt6i_flags & RTF_ADDRCONF)
-		rtm->rtm_protocol = RTPROT_KERNEL;
-	else if (rt->rt6i_flags&RTF_DEFAULT)
-		rtm->rtm_protocol = RTPROT_RA;
+	else if (rt->rt6i_flags & RTF_ADDRCONF) {
+		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
+			rtm->rtm_protocol = RTPROT_RA;
+		else
+			rtm->rtm_protocol = RTPROT_KERNEL;
+	}
 
-	if (rt->rt6i_flags&RTF_CACHE)
+	if (rt->rt6i_flags & RTF_CACHE)
 		rtm->rtm_flags |= RTM_F_CLONED;
 
 	if (dst) {
-		RTA_PUT(skb, RTA_DST, 16, dst);
-	        rtm->rtm_dst_len = 128;
+		if (nla_put(skb, RTA_DST, 16, dst))
+			goto nla_put_failure;
+		rtm->rtm_dst_len = 128;
 	} else if (rtm->rtm_dst_len)
-		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
+			goto nla_put_failure;
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src) {
-		RTA_PUT(skb, RTA_SRC, 16, src);
-	        rtm->rtm_src_len = 128;
-	} else if (rtm->rtm_src_len)
-		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+		if (nla_put(skb, RTA_SRC, 16, src))
+			goto nla_put_failure;
+		rtm->rtm_src_len = 128;
+	} else if (rtm->rtm_src_len &&
+		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
+		goto nla_put_failure;
 #endif
-	if (iif)
-		RTA_PUT(skb, RTA_IIF, 4, &iif);
-	else if (dst) {
+	if (iif) {
+#ifdef CONFIG_IPV6_MROUTE
+		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
+			int err = ip6mr_get_route(net, skb, rtm, nowait);
+			if (err <= 0) {
+				if (!nowait) {
+					if (err == 0)
+						return 0;
+					goto nla_put_failure;
+				} else {
+					if (err == -EMSGSIZE)
+						goto nla_put_failure;
+				}
+			}
+		} else
+#endif
+			if (nla_put_u32(skb, RTA_IIF, iif))
+				goto nla_put_failure;
+	} else if (dst) {
 		struct in6_addr saddr_buf;
-		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
-			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
-	}
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
-		goto rtattr_failure;
-	if (rt->u.dst.neighbour)
-		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
-	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
-	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
-	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
-	if (rt->rt6i_expires)
-		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
-	else
-		ci.rta_expires = 0;
-	ci.rta_used = rt->u.dst.__use;
-	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
-	ci.rta_error = rt->u.dst.error;
-	ci.rta_id = 0;
-	ci.rta_ts = 0;
-	ci.rta_tsage = 0;
-	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
+		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
+			goto nla_put_failure;
+	}
+
+	if (rt->rt6i_prefsrc.plen) {
+		struct in6_addr saddr_buf;
+		saddr_buf = rt->rt6i_prefsrc.addr;
+		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
+			goto nla_put_failure;
+	}
+
+	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+		goto nla_put_failure;
+
+	if (rt->rt6i_flags & RTF_GATEWAY) {
+		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
+			goto nla_put_failure;
+	}
+
+	if (rt->dst.dev &&
+	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
+		goto nla_put_failure;
+	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
+		goto nla_put_failure;
+
+	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
 }
 
-static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 {
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	int prefix;
 
-	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
-		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
+	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
+		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
 	} else
 		prefix = 0;
 
-	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
-		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
-		     prefix, NLM_F_MULTI);
+	return rt6_fill_node(arg->net,
+		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
+		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
+		     prefix, 0, NLM_F_MULTI);
 }
 
-static int fib6_dump_node(struct fib6_walker_t *w)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
 {
-	int res;
+	struct net *net = sock_net(in_skb->sk);
+	struct nlattr *tb[RTA_MAX+1];
 	struct rt6_info *rt;
+	struct sk_buff *skb;
+	struct rtmsg *rtm;
+	struct flowi6 fl6;
+	int err, iif = 0, oif = 0;
 
-	for (rt = w->leaf; rt; rt = rt->u.next) {
-		res = rt6_dump_route(rt, w->args);
-		if (res < 0) {
-			/* Frame is full, suspend walking */
-			w->leaf = rt;
-			return 1;
-		}
-		BUG_TRAP(res!=0);
-	}
-	w->leaf = NULL;
-	return 0;
-}
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+	if (err < 0)
+		goto errout;
 
-static void fib6_dump_end(struct netlink_callback *cb)
-{
-	struct fib6_walker_t *w = (void*)cb->args[0];
+	err = -EINVAL;
+	memset(&fl6, 0, sizeof(fl6));
+
+	if (tb[RTA_SRC]) {
+		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
+			goto errout;
 
-	if (w) {
-		cb->args[0] = 0;
-		fib6_walker_unlink(w);
-		kfree(w);
+		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
 	}
-	if (cb->args[1]) {
-		cb->done = (void*)cb->args[1];
-		cb->args[1] = 0;
+
+	if (tb[RTA_DST]) {
+		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
+			goto errout;
+
+		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
 	}
-}
 
-static int fib6_dump_done(struct netlink_callback *cb)
-{
-	fib6_dump_end(cb);
-	return cb->done(cb);
-}
+	if (tb[RTA_IIF])
+		iif = nla_get_u32(tb[RTA_IIF]);
 
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct rt6_rtnl_dump_arg arg;
-	struct fib6_walker_t *w;
-	int res;
+	if (tb[RTA_OIF])
+		oif = nla_get_u32(tb[RTA_OIF]);
 
-	arg.skb = skb;
-	arg.cb = cb;
+	if (tb[RTA_MARK])
+		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
 
-	w = (void*)cb->args[0];
-	if (w == NULL) {
-		/* New dump:
-		 * 
-		 * 1. hook callback destructor.
-		 */
-		cb->args[1] = (long)cb->done;
-		cb->done = fib6_dump_done;
+	if (iif) {
+		struct net_device *dev;
+		int flags = 0;
 
-		/*
-		 * 2. allocate and initialize walker.
-		 */
-		w = kmalloc(sizeof(*w), GFP_ATOMIC);
-		if (w == NULL)
-			return -ENOMEM;
-		RT6_TRACE("dump<%p", w);
-		memset(w, 0, sizeof(*w));
-		w->root = &ip6_routing_table;
-		w->func = fib6_dump_node;
-		w->args = &arg;
-		cb->args[0] = (long)w;
-		read_lock_bh(&rt6_lock);
-		res = fib6_walk(w);
-		read_unlock_bh(&rt6_lock);
+		dev = __dev_get_by_index(net, iif);
+		if (!dev) {
+			err = -ENODEV;
+			goto errout;
+		}
+
+		fl6.flowi6_iif = iif;
+
+		if (!ipv6_addr_any(&fl6.saddr))
+			flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
+							       flags);
 	} else {
-		w->args = &arg;
-		read_lock_bh(&rt6_lock);
-		res = fib6_walk_continue(w);
-		read_unlock_bh(&rt6_lock);
-	}
-#if RT6_DEBUG >= 3
-	if (res <= 0 && skb->len == 0)
-		RT6_TRACE("%p>dump end\n", w);
-#endif
-	res = res < 0 ? res : skb->len;
-	/* res < 0 is an error. (really, impossible)
-	   res == 0 means that dump is complete, but skb still can contain data.
-	   res > 0 dump is not complete, but frame is full.
-	 */
-	/* Destroy walker, if dump of this table is complete. */
-	if (res <= 0)
-		fib6_dump_end(cb);
-	return res;
-}
+		fl6.flowi6_oif = oif;
 
-int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
-{
-	struct rtattr **rta = arg;
-	int iif = 0;
-	int err = -ENOBUFS;
-	struct sk_buff *skb;
-	struct flowi fl;
-	struct rt6_info *rt;
+		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
+	}
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (skb == NULL)
-		goto out;
+	if (!skb) {
+		ip6_rt_put(rt);
+		err = -ENOBUFS;
+		goto errout;
+	}
 
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
 	 */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
-	memset(&fl, 0, sizeof(fl));
-	if (rta[RTA_SRC-1])
-		ipv6_addr_copy(&fl.fl6_src,
-			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
-	if (rta[RTA_DST-1])
-		ipv6_addr_copy(&fl.fl6_dst,
-			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
-
-	if (rta[RTA_IIF-1])
-		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+	skb_dst_set(skb, &rt->dst);
 
-	if (iif) {
-		struct net_device *dev;
-		dev = __dev_get_by_index(iif);
-		if (!dev) {
-			err = -ENODEV;
-			goto out_free;
-		}
+	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
+			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
+			    nlh->nlmsg_seq, 0, 0, 0);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto errout;
 	}
 
-	fl.oif = 0;
-	if (rta[RTA_OIF-1])
-		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+errout:
+	return err;
+}
+
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
+{
+	struct sk_buff *skb;
+	struct net *net = info->nl_net;
+	u32 seq;
+	int err;
 
-	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
+	err = -ENOBUFS;
+	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
 
-	skb->dst = &rt->u.dst;
+	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
+	if (!skb)
+		goto errout;
 
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-	err = rt6_fill_node(skb, rt, 
-			    &fl.fl6_dst, &fl.fl6_src,
-			    iif,
-			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
-			    nlh->nlmsg_seq, 0, 0);
+	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
+				event, info->portid, seq, 0, 0, 0);
 	if (err < 0) {
-		err = -EMSGSIZE;
-		goto out_free;
+		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
 	}
-
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-out:
-	return err;
-out_free:
-	kfree_skb(skb);
-	goto out;	
+	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
+		    info->nlh, gfp_any());
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
 }
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
-			struct netlink_skb_parms *req)
+static int ip6_route_dev_notify(struct notifier_block *this,
+				unsigned long event, void *ptr)
 {
-	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
-	u32 pid = current->pid;
-	u32 seq = 0;
-
-	if (req)
-		pid = req->pid;
-	if (nlh)
-		seq = nlh->nlmsg_seq;
-	
-	skb = alloc_skb(size, gfp_any());
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
-		return;
-	}
-	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
-		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
-		return;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+
+	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
+		net->ipv6.ip6_null_entry->dst.dev = dev;
+		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
+		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
+		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
+		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
+#endif
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
+
+	return NOTIFY_OK;
 }
 
 /*
@@ -1868,267 +2840,397 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
 
 #ifdef CONFIG_PROC_FS
 
-#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
-
-struct rt6_proc_arg
-{
-	char *buffer;
-	int offset;
-	int length;
-	int skip;
-	int len;
+static const struct file_operations ipv6_route_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ipv6_route_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
 };
 
-static int rt6_info_route(struct rt6_info *rt, void *p_arg)
-{
-	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
-	int i;
-
-	if (arg->skip < arg->offset / RT6_INFO_LEN) {
-		arg->skip++;
-		return 0;
-	}
-
-	if (arg->len >= arg->length)
-		return 0;
-
-	for (i=0; i<16; i++) {
-		sprintf(arg->buffer + arg->len, "%02x",
-			rt->rt6i_dst.addr.s6_addr[i]);
-		arg->len += 2;
-	}
-	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
-			    rt->rt6i_dst.plen);
-
-#ifdef CONFIG_IPV6_SUBTREES
-	for (i=0; i<16; i++) {
-		sprintf(arg->buffer + arg->len, "%02x",
-			rt->rt6i_src.addr.s6_addr[i]);
-		arg->len += 2;
-	}
-	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
-			    rt->rt6i_src.plen);
-#else
-	sprintf(arg->buffer + arg->len,
-		"00000000000000000000000000000000 00 ");
-	arg->len += 36;
-#endif
-
-	if (rt->rt6i_nexthop) {
-		for (i=0; i<16; i++) {
-			sprintf(arg->buffer + arg->len, "%02x",
-				rt->rt6i_nexthop->primary_key[i]);
-			arg->len += 2;
-		}
-	} else {
-		sprintf(arg->buffer + arg->len,
-			"00000000000000000000000000000000");
-		arg->len += 32;
-	}
-	arg->len += sprintf(arg->buffer + arg->len,
-			    " %08x %08x %08x %08x %8s\n",
-			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
-			    rt->u.dst.__use, rt->rt6i_flags, 
-			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
-	return 0;
-}
-
-static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
-{
-	struct rt6_proc_arg arg;
-	arg.buffer = buffer;
-	arg.offset = offset;
-	arg.length = length;
-	arg.skip = 0;
-	arg.len = 0;
-
-	read_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
-	read_unlock_bh(&rt6_lock);
-
-	*start = buffer;
-	if (offset)
-		*start += offset % RT6_INFO_LEN;
-
-	arg.len -= offset % RT6_INFO_LEN;
-
-	if (arg.len > length)
-		arg.len = length;
-	if (arg.len < 0)
-		arg.len = 0;
-
-	return arg.len;
-}
-
 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 {
+	struct net *net = (struct net *)seq->private;
 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
-		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
-		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
-		      rt6_stats.fib_rt_cache,
-		      atomic_read(&ip6_dst_ops.entries),
-		      rt6_stats.fib_discarded_routes);
+		   net->ipv6.rt6_stats->fib_nodes,
+		   net->ipv6.rt6_stats->fib_route_nodes,
+		   net->ipv6.rt6_stats->fib_rt_alloc,
+		   net->ipv6.rt6_stats->fib_rt_entries,
+		   net->ipv6.rt6_stats->fib_rt_cache,
+		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
+		   net->ipv6.rt6_stats->fib_discarded_routes);
 
 	return 0;
 }
 
 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, rt6_stats_seq_show, NULL);
+	return single_open_net(inode, file, rt6_stats_seq_show);
 }
 
-static struct file_operations rt6_stats_seq_fops = {
+static const struct file_operations rt6_stats_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = rt6_stats_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = single_release,
+	.release = single_release_net,
 };
 #endif	/* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SYSCTL
 
-static int flush_delay;
-
 static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
 			      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	if (write) {
-		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
-		return 0;
-	} else
+	struct net *net;
+	int delay;
+	if (!write)
 		return -EINVAL;
+
+	net = (struct net *)ctl->extra1;
+	delay = net->ipv6.sysctl.flush_delay;
+	proc_dointvec(ctl, write, buffer, lenp, ppos);
+	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
+	return 0;
 }
 
-ctl_table ipv6_route_table[] = {
-        {
-		.ctl_name	=	NET_IPV6_ROUTE_FLUSH, 
+struct ctl_table ipv6_route_table_template[] = {
+	{
 		.procname	=	"flush",
-         	.data		=	&flush_delay,
+		.data		=	&init_net.ipv6.sysctl.flush_delay,
 		.maxlen		=	sizeof(int),
 		.mode		=	0200,
-         	.proc_handler	=	&ipv6_sysctl_rtcache_flush
+		.proc_handler	=	ipv6_sysctl_rtcache_flush
 	},
 	{
-		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
 		.procname	=	"gc_thresh",
-         	.data		=	&ip6_dst_ops.gc_thresh,
+		.data		=	&ip6_dst_ops_template.gc_thresh,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-         	.proc_handler	=	&proc_dointvec,
+		.proc_handler	=	proc_dointvec,
 	},
 	{
-		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
 		.procname	=	"max_size",
-         	.data		=	&ip6_rt_max_size,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-         	.proc_handler	=	&proc_dointvec,
+		.proc_handler	=	proc_dointvec,
 	},
 	{
-		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
 		.procname	=	"gc_min_interval",
-         	.data		=	&ip6_rt_gc_min_interval,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-         	.proc_handler	=	&proc_dointvec_jiffies,
-		.strategy	=	&sysctl_jiffies,
+		.proc_handler	=	proc_dointvec_jiffies,
 	},
 	{
-		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
 		.procname	=	"gc_timeout",
-         	.data		=	&ip6_rt_gc_timeout,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-         	.proc_handler	=	&proc_dointvec_jiffies,
-		.strategy	=	&sysctl_jiffies,
+		.proc_handler	=	proc_dointvec_jiffies,
 	},
 	{
-		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
 		.procname	=	"gc_interval",
-         	.data		=	&ip6_rt_gc_interval,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-         	.proc_handler	=	&proc_dointvec_jiffies,
-		.strategy	=	&sysctl_jiffies,
+		.proc_handler	=	proc_dointvec_jiffies,
 	},
 	{
-		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
 		.procname	=	"gc_elasticity",
-         	.data		=	&ip6_rt_gc_elasticity,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-         	.proc_handler	=	&proc_dointvec_jiffies,
-		.strategy	=	&sysctl_jiffies,
+		.proc_handler	=	proc_dointvec,
 	},
 	{
-		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
 		.procname	=	"mtu_expires",
-         	.data		=	&ip6_rt_mtu_expires,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-         	.proc_handler	=	&proc_dointvec_jiffies,
-		.strategy	=	&sysctl_jiffies,
+		.proc_handler	=	proc_dointvec_jiffies,
 	},
 	{
-		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
 		.procname	=	"min_adv_mss",
-         	.data		=	&ip6_rt_min_advmss,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-         	.proc_handler	=	&proc_dointvec_jiffies,
-		.strategy	=	&sysctl_jiffies,
+		.proc_handler	=	proc_dointvec,
 	},
 	{
-		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
 		.procname	=	"gc_min_interval_ms",
-         	.data		=	&ip6_rt_gc_min_interval,
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-         	.proc_handler	=	&proc_dointvec_ms_jiffies,
-		.strategy	=	&sysctl_ms_jiffies,
+		.proc_handler	=	proc_dointvec_ms_jiffies,
 	},
-	{ .ctl_name = 0 }
+	{ }
 };
 
+struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(ipv6_route_table_template,
+			sizeof(ipv6_route_table_template),
+			GFP_KERNEL);
+
+	if (table) {
+		table[0].data = &net->ipv6.sysctl.flush_delay;
+		table[0].extra1 = net;
+		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
+		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
+		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
+		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
+		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
+		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
+		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
+		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+
+		/* Don't export sysctls to unprivileged users */
+		if (net->user_ns != &init_user_ns)
+			table[0].procname = NULL;
+	}
+
+	return table;
+}
 #endif
 
-void __init ip6_route_init(void)
+static int __net_init ip6_route_net_init(struct net *net)
 {
-	struct proc_dir_entry *p;
+	int ret = -ENOMEM;
+
+	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
+	       sizeof(net->ipv6.ip6_dst_ops));
+
+	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
+		goto out_ip6_dst_ops;
+
+	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
+					   sizeof(*net->ipv6.ip6_null_entry),
+					   GFP_KERNEL);
+	if (!net->ipv6.ip6_null_entry)
+		goto out_ip6_dst_entries;
+	net->ipv6.ip6_null_entry->dst.path =
+		(struct dst_entry *)net->ipv6.ip6_null_entry;
+	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
+			 ip6_template_metrics, true);
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
+					       sizeof(*net->ipv6.ip6_prohibit_entry),
+					       GFP_KERNEL);
+	if (!net->ipv6.ip6_prohibit_entry)
+		goto out_ip6_null_entry;
+	net->ipv6.ip6_prohibit_entry->dst.path =
+		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
+	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
+			 ip6_template_metrics, true);
+
+	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
+					       sizeof(*net->ipv6.ip6_blk_hole_entry),
+					       GFP_KERNEL);
+	if (!net->ipv6.ip6_blk_hole_entry)
+		goto out_ip6_prohibit_entry;
+	net->ipv6.ip6_blk_hole_entry->dst.path =
+		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
+	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
+			 ip6_template_metrics, true);
+#endif
 
-	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
-						     sizeof(struct rt6_info),
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-	if (!ip6_dst_ops.kmem_cachep)
-		panic("cannot create ip6_dst_cache");
+	net->ipv6.sysctl.flush_delay = 0;
+	net->ipv6.sysctl.ip6_rt_max_size = 4096;
+	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
+	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
+	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
+	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
+	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
+	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 
-	fib6_init();
-#ifdef 	CONFIG_PROC_FS
-	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
-	if (p)
-		p->owner = THIS_MODULE;
+	net->ipv6.ip6_rt_gc_expire = 30*HZ;
+
+	ret = 0;
+out:
+	return ret;
 
-	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+out_ip6_prohibit_entry:
+	kfree(net->ipv6.ip6_prohibit_entry);
+out_ip6_null_entry:
+	kfree(net->ipv6.ip6_null_entry);
 #endif
-#ifdef CONFIG_XFRM
-	xfrm6_init();
+out_ip6_dst_entries:
+	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
+out_ip6_dst_ops:
+	goto out;
+}
+
+static void __net_exit ip6_route_net_exit(struct net *net)
+{
+	kfree(net->ipv6.ip6_null_entry);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	kfree(net->ipv6.ip6_prohibit_entry);
+	kfree(net->ipv6.ip6_blk_hole_entry);
 #endif
+	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
 }
 
-void ip6_route_cleanup(void)
+static int __net_init ip6_route_net_init_late(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("ipv6_route");
-	proc_net_remove("rt6_stats");
+	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
+	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
 #endif
-#ifdef CONFIG_XFRM
-	xfrm6_fini();
+	return 0;
+}
+
+static void __net_exit ip6_route_net_exit_late(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ipv6_route", net->proc_net);
+	remove_proc_entry("rt6_stats", net->proc_net);
 #endif
-	rt6_ifdown(NULL);
+}
+
+static struct pernet_operations ip6_route_net_ops = {
+	.init = ip6_route_net_init,
+	.exit = ip6_route_net_exit,
+};
+
+static int __net_init ipv6_inetpeer_init(struct net *net)
+{
+	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+	if (!bp)
+		return -ENOMEM;
+	inet_peer_base_init(bp);
+	net->ipv6.peers = bp;
+	return 0;
+}
+
+static void __net_exit ipv6_inetpeer_exit(struct net *net)
+{
+	struct inet_peer_base *bp = net->ipv6.peers;
+
+	net->ipv6.peers = NULL;
+	inetpeer_invalidate_tree(bp);
+	kfree(bp);
+}
+
+static struct pernet_operations ipv6_inetpeer_ops = {
+	.init	=	ipv6_inetpeer_init,
+	.exit	=	ipv6_inetpeer_exit,
+};
+
+static struct pernet_operations ip6_route_net_late_ops = {
+	.init = ip6_route_net_init_late,
+	.exit = ip6_route_net_exit_late,
+};
+
+static struct notifier_block ip6_route_dev_notifier = {
+	.notifier_call = ip6_route_dev_notify,
+	.priority = 0,
+};
+
+int __init ip6_route_init(void)
+{
+	int ret;
+
+	ret = -ENOMEM;
+	ip6_dst_ops_template.kmem_cachep =
+		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!ip6_dst_ops_template.kmem_cachep)
+		goto out;
+
+	ret = dst_entries_init(&ip6_dst_blackhole_ops);
+	if (ret)
+		goto out_kmem_cache;
+
+	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
+	if (ret)
+		goto out_dst_entries;
+
+	ret = register_pernet_subsys(&ip6_route_net_ops);
+	if (ret)
+		goto out_register_inetpeer;
+
+	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
+
+	/* Registering of the loopback is done before this portion of code,
+	 * the loopback reference in rt6_info will not be taken, do it
+	 * manually for init_net */
+	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+  #endif
+	ret = fib6_init();
+	if (ret)
+		goto out_register_subsys;
+
+	ret = xfrm6_init();
+	if (ret)
+		goto out_fib6_init;
+
+	ret = fib6_rules_init();
+	if (ret)
+		goto xfrm6_init;
+
+	ret = register_pernet_subsys(&ip6_route_net_late_ops);
+	if (ret)
+		goto fib6_rules_init;
+
+	ret = -ENOBUFS;
+	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
+	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
+	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+		goto out_register_late_subsys;
+
+	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
+	if (ret)
+		goto out_register_late_subsys;
+
+out:
+	return ret;
+
+out_register_late_subsys:
+	unregister_pernet_subsys(&ip6_route_net_late_ops);
+fib6_rules_init:
+	fib6_rules_cleanup();
+xfrm6_init:
+	xfrm6_fini();
+out_fib6_init:
+	fib6_gc_cleanup();
+out_register_subsys:
+	unregister_pernet_subsys(&ip6_route_net_ops);
+out_register_inetpeer:
+	unregister_pernet_subsys(&ipv6_inetpeer_ops);
+out_dst_entries:
+	dst_entries_destroy(&ip6_dst_blackhole_ops);
+out_kmem_cache:
+	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
+	goto out;
+}
+
+void ip6_route_cleanup(void)
+{
+	unregister_netdevice_notifier(&ip6_route_dev_notifier);
+	unregister_pernet_subsys(&ip6_route_net_late_ops);
+	fib6_rules_cleanup();
+	xfrm6_fini();
 	fib6_gc_cleanup();
-	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
+	unregister_pernet_subsys(&ipv6_inetpeer_ops);
+	unregister_pernet_subsys(&ip6_route_net_ops);
+	dst_entries_destroy(&ip6_dst_blackhole_ops);
+	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
 }
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index c3123c9e1a8..4f408176dc6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -3,11 +3,9 @@
  *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
  *
- *	$Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
@@ -16,23 +14,27 @@
  *	Changes:
  * Roger Venning <r.venning@telstra.com>:	6to4 support
  * Nate Thompson <nate@thebog.net>:		6to4 support
+ * Fred Templin <fred.l.templin@boeing.com>:	isatap support
  */
 
-#include <linux/config.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
-#include <linux/sched.h>
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/icmp.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/init.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -47,10 +49,12 @@
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/icmp.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
 #include <net/inet_ecn.h>
 #include <net/xfrm.h>
 #include <net/dsfield.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 /*
    This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
@@ -59,51 +63,72 @@
  */
 
 #define HASH_SIZE  16
-#define HASH(addr) ((addr^(addr>>4))&0xF)
+#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-static int ipip6_fb_tunnel_init(struct net_device *dev);
 static int ipip6_tunnel_init(struct net_device *dev);
 static void ipip6_tunnel_setup(struct net_device *dev);
+static void ipip6_dev_free(struct net_device *dev);
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+		      __be32 *v4dst);
+static struct rtnl_link_ops sit_link_ops __read_mostly;
+
+static int sit_net_id __read_mostly;
+struct sit_net {
+	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_wc[1];
+	struct ip_tunnel __rcu **tunnels[4];
+
+	struct net_device *fb_tunnel_dev;
+};
 
-static struct net_device *ipip6_fb_tunnel_dev;
-
-static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_r[HASH_SIZE];
-static struct ip_tunnel *tunnels_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_wc[1];
-static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
-
-static DEFINE_RWLOCK(ipip6_lock);
-
-static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local)
+/*
+ * Must be invoked with rcu_read_lock
+ */
+static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
+		struct net_device *dev, __be32 remote, __be32 local)
 {
-	unsigned h0 = HASH(remote);
-	unsigned h1 = HASH(local);
+	unsigned int h0 = HASH(remote);
+	unsigned int h1 = HASH(local);
 	struct ip_tunnel *t;
+	struct sit_net *sitn = net_generic(net, sit_net_id);
 
-	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+	for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
 		if (local == t->parms.iph.saddr &&
-		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+		    remote == t->parms.iph.daddr &&
+		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	for (t = tunnels_r[h0]; t; t = t->next) {
-		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+	for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
+		if (remote == t->parms.iph.daddr &&
+		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	for (t = tunnels_l[h1]; t; t = t->next) {
-		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
+	for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
+		if (local == t->parms.iph.saddr &&
+		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+		    (t->dev->flags & IFF_UP))
 			return t;
 	}
-	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+	t = rcu_dereference(sitn->tunnels_wc[0]);
+	if ((t != NULL) && (t->dev->flags & IFF_UP))
 		return t;
 	return NULL;
 }
 
-static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
+static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
+		struct ip_tunnel_parm *parms)
 {
-	u32 remote = t->parms.iph.daddr;
-	u32 local = t->parms.iph.saddr;
-	unsigned h = 0;
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
+	unsigned int h = 0;
 	int prio = 0;
 
 	if (remote) {
@@ -114,134 +139,400 @@ static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
 		prio |= 1;
 		h ^= HASH(local);
 	}
-	return &tunnels[prio][h];
+	return &sitn->tunnels[prio][h];
 }
 
-static void ipip6_tunnel_unlink(struct ip_tunnel *t)
+static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn,
+		struct ip_tunnel *t)
 {
-	struct ip_tunnel **tp;
+	return __ipip6_bucket(sitn, &t->parms);
+}
 
-	for (tp = ipip6_bucket(t); *tp; tp = &(*tp)->next) {
-		if (t == *tp) {
-			write_lock_bh(&ipip6_lock);
-			*tp = t->next;
-			write_unlock_bh(&ipip6_lock);
+static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
+{
+	struct ip_tunnel __rcu **tp;
+	struct ip_tunnel *iter;
+
+	for (tp = ipip6_bucket(sitn, t);
+	     (iter = rtnl_dereference(*tp)) != NULL;
+	     tp = &iter->next) {
+		if (t == iter) {
+			rcu_assign_pointer(*tp, t->next);
 			break;
 		}
 	}
 }
 
-static void ipip6_tunnel_link(struct ip_tunnel *t)
+static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
+{
+	struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
+
+	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+	rcu_assign_pointer(*tp, t);
+}
+
+static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
+{
+#ifdef CONFIG_IPV6_SIT_6RD
+	struct ip_tunnel *t = netdev_priv(dev);
+
+	if (t->dev == sitn->fb_tunnel_dev) {
+		ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
+		t->ip6rd.relay_prefix = 0;
+		t->ip6rd.prefixlen = 16;
+		t->ip6rd.relay_prefixlen = 0;
+	} else {
+		struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev);
+		memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd));
+	}
+#endif
+}
+
+static int ipip6_tunnel_create(struct net_device *dev)
 {
-	struct ip_tunnel **tp = ipip6_bucket(t);
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+	struct sit_net *sitn = net_generic(net, sit_net_id);
+	int err;
+
+	err = ipip6_tunnel_init(dev);
+	if (err < 0)
+		goto out;
+	ipip6_tunnel_clone_6rd(dev, sitn);
+
+	if ((__force u16)t->parms.i_flags & SIT_ISATAP)
+		dev->priv_flags |= IFF_ISATAP;
 
-	t->next = *tp;
-	write_lock_bh(&ipip6_lock);
-	*tp = t;
-	write_unlock_bh(&ipip6_lock);
+	err = register_netdevice(dev);
+	if (err < 0)
+		goto out;
+
+	strcpy(t->parms.name, dev->name);
+	dev->rtnl_link_ops = &sit_link_ops;
+
+	dev_hold(dev);
+
+	ipip6_tunnel_link(sitn, t);
+	return 0;
+
+out:
+	return err;
 }
 
-static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
+		struct ip_tunnel_parm *parms, int create)
 {
-	u32 remote = parms->iph.daddr;
-	u32 local = parms->iph.saddr;
-	struct ip_tunnel *t, **tp, *nt;
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
+	struct ip_tunnel *t, *nt;
+	struct ip_tunnel __rcu **tp;
 	struct net_device *dev;
-	unsigned h = 0;
-	int prio = 0;
 	char name[IFNAMSIZ];
+	struct sit_net *sitn = net_generic(net, sit_net_id);
 
-	if (remote) {
-		prio |= 2;
-		h ^= HASH(remote);
-	}
-	if (local) {
-		prio |= 1;
-		h ^= HASH(local);
-	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
-		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
-			return t;
+	for (tp = __ipip6_bucket(sitn, parms);
+	    (t = rtnl_dereference(*tp)) != NULL;
+	     tp = &t->next) {
+		if (local == t->parms.iph.saddr &&
+		    remote == t->parms.iph.daddr &&
+		    parms->link == t->parms.link) {
+			if (create)
+				return NULL;
+			else
+				return t;
+		}
 	}
 	if (!create)
 		goto failed;
 
 	if (parms->name[0])
 		strlcpy(name, parms->name, IFNAMSIZ);
-	else {
-		int i;
-		for (i=1; i<100; i++) {
-			sprintf(name, "sit%d", i);
-			if (__dev_get_by_name(name) == NULL)
-				break;
-		}
-		if (i==100)
-			goto failed;
-	}
+	else
+		strcpy(name, "sit%d");
 
 	dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
 	if (dev == NULL)
 		return NULL;
 
-	nt = dev->priv;
-	dev->init = ipip6_tunnel_init;
-	nt->parms = *parms;
+	dev_net_set(dev, net);
 
-	if (register_netdevice(dev) < 0) {
-		free_netdev(dev);
-		goto failed;
-	}
+	nt = netdev_priv(dev);
 
-	dev_hold(dev);
+	nt->parms = *parms;
+	if (ipip6_tunnel_create(dev) < 0)
+		goto failed_free;
 
-	ipip6_tunnel_link(nt);
 	return nt;
 
+failed_free:
+	ipip6_dev_free(dev);
 failed:
 	return NULL;
 }
 
-static void ipip6_tunnel_uninit(struct net_device *dev)
+#define for_each_prl_rcu(start)			\
+	for (prl = rcu_dereference(start);	\
+	     prl;				\
+	     prl = rcu_dereference(prl->next))
+
+static struct ip_tunnel_prl_entry *
+__ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
 {
-	if (dev == ipip6_fb_tunnel_dev) {
-		write_lock_bh(&ipip6_lock);
-		tunnels_wc[0] = NULL;
-		write_unlock_bh(&ipip6_lock);
-		dev_put(dev);
+	struct ip_tunnel_prl_entry *prl;
+
+	for_each_prl_rcu(t->prl)
+		if (prl->addr == addr)
+			break;
+	return prl;
+
+}
+
+static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
+				struct ip_tunnel_prl __user *a)
+{
+	struct ip_tunnel_prl kprl, *kp;
+	struct ip_tunnel_prl_entry *prl;
+	unsigned int cmax, c = 0, ca, len;
+	int ret = 0;
+
+	if (copy_from_user(&kprl, a, sizeof(kprl)))
+		return -EFAULT;
+	cmax = kprl.datalen / sizeof(kprl);
+	if (cmax > 1 && kprl.addr != htonl(INADDR_ANY))
+		cmax = 1;
+
+	/* For simple GET or for root users,
+	 * we try harder to allocate.
+	 */
+	kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
+		kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+		NULL;
+
+	rcu_read_lock();
+
+	ca = t->prl_count < cmax ? t->prl_count : cmax;
+
+	if (!kp) {
+		/* We don't try hard to allocate much memory for
+		 * non-root users.
+		 * For root users, retry allocating enough memory for
+		 * the answer.
+		 */
+		kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
+		if (!kp) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	c = 0;
+	for_each_prl_rcu(t->prl) {
+		if (c >= cmax)
+			break;
+		if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
+			continue;
+		kp[c].addr = prl->addr;
+		kp[c].flags = prl->flags;
+		c++;
+		if (kprl.addr != htonl(INADDR_ANY))
+			break;
+	}
+out:
+	rcu_read_unlock();
+
+	len = sizeof(*kp) * c;
+	ret = 0;
+	if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen))
+		ret = -EFAULT;
+
+	kfree(kp);
+
+	return ret;
+}
+
+static int
+ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
+{
+	struct ip_tunnel_prl_entry *p;
+	int err = 0;
+
+	if (a->addr == htonl(INADDR_ANY))
+		return -EINVAL;
+
+	ASSERT_RTNL();
+
+	for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) {
+		if (p->addr == a->addr) {
+			if (chg) {
+				p->flags = a->flags;
+				goto out;
+			}
+			err = -EEXIST;
+			goto out;
+		}
+	}
+
+	if (chg) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL);
+	if (!p) {
+		err = -ENOBUFS;
+		goto out;
+	}
+
+	p->next = t->prl;
+	p->addr = a->addr;
+	p->flags = a->flags;
+	t->prl_count++;
+	rcu_assign_pointer(t->prl, p);
+out:
+	return err;
+}
+
+static void prl_list_destroy_rcu(struct rcu_head *head)
+{
+	struct ip_tunnel_prl_entry *p, *n;
+
+	p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
+	do {
+		n = rcu_dereference_protected(p->next, 1);
+		kfree(p);
+		p = n;
+	} while (p);
+}
+
+static int
+ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
+{
+	struct ip_tunnel_prl_entry *x;
+	struct ip_tunnel_prl_entry __rcu **p;
+	int err = 0;
+
+	ASSERT_RTNL();
+
+	if (a && a->addr != htonl(INADDR_ANY)) {
+		for (p = &t->prl;
+		     (x = rtnl_dereference(*p)) != NULL;
+		     p = &x->next) {
+			if (x->addr == a->addr) {
+				*p = x->next;
+				kfree_rcu(x, rcu_head);
+				t->prl_count--;
+				goto out;
+			}
+		}
+		err = -ENXIO;
 	} else {
-		ipip6_tunnel_unlink((struct ip_tunnel*)dev->priv);
-		dev_put(dev);
+		x = rtnl_dereference(t->prl);
+		if (x) {
+			t->prl_count = 0;
+			call_rcu(&x->rcu_head, prl_list_destroy_rcu);
+			t->prl = NULL;
+		}
 	}
+out:
+	return err;
 }
 
+static int
+isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
+{
+	struct ip_tunnel_prl_entry *p;
+	int ok = 1;
+
+	rcu_read_lock();
+	p = __ipip6_tunnel_locate_prl(t, iph->saddr);
+	if (p) {
+		if (p->flags & PRL_DEFAULT)
+			skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT;
+		else
+			skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
+	} else {
+		const struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
+
+		if (ipv6_addr_is_isatap(addr6) &&
+		    (addr6->s6_addr32[3] == iph->saddr) &&
+		    ipv6_chk_prefix(addr6, t->dev))
+			skb->ndisc_nodetype = NDISC_NODETYPE_HOST;
+		else
+			ok = 0;
+	}
+	rcu_read_unlock();
+	return ok;
+}
 
-static void ipip6_err(struct sk_buff *skb, u32 info)
+static void ipip6_tunnel_uninit(struct net_device *dev)
 {
-#ifndef I_WISH_WORLD_WERE_PERFECT
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct sit_net *sitn = net_generic(tunnel->net, sit_net_id);
+
+	if (dev == sitn->fb_tunnel_dev) {
+		RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
+	} else {
+		ipip6_tunnel_unlink(sitn, tunnel);
+		ipip6_tunnel_del_prl(tunnel, NULL);
+	}
+	ip_tunnel_dst_reset_all(tunnel);
+	dev_put(dev);
+}
 
-/* It is not :-( All the routers (except for Linux) return only
-   8 bytes of packet payload. It means, that precise relaying of
-   ICMP in the real Internet is absolutely infeasible.
+/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
+ * if sufficient data bytes are available
  */
-	struct iphdr *iph = (struct iphdr*)skb->data;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb)
+{
+	const struct iphdr *iph = (const struct iphdr *) skb->data;
+	struct rt6_info *rt;
+	struct sk_buff *skb2;
+
+	if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8))
+		return 1;
+
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+
+	if (!skb2)
+		return 1;
+
+	skb_dst_drop(skb2);
+	skb_pull(skb2, iph->ihl * 4);
+	skb_reset_network_header(skb2);
+
+	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+
+	if (rt && rt->dst.dev)
+		skb2->dev = rt->dst.dev;
+
+	icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+
+	if (rt)
+		ip6_rt_put(rt);
+
+	kfree_skb(skb2);
+
+	return 0;
+}
+
+static int ipip6_err(struct sk_buff *skb, u32 info)
+{
+	const struct iphdr *iph = (const struct iphdr *)skb->data;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
+	int err;
 
 	switch (type) {
 	default:
 	case ICMP_PARAMETERPROB:
-		return;
+		return 0;
 
 	case ICMP_DEST_UNREACH:
 		switch (code) {
 		case ICMP_SR_FAILED:
-		case ICMP_PORT_UNREACH:
 			/* Impossible event. */
-			return;
-		case ICMP_FRAG_NEEDED:
-			/* Soft state for pmtu is maintained by IP core. */
-			return;
+			return 0;
 		default:
 			/* All others are translated to HOST_UNREACH.
 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -252,166 +543,258 @@ static void ipip6_err(struct sk_buff *skb, u32 info)
 		break;
 	case ICMP_TIME_EXCEEDED:
 		if (code != ICMP_EXC_TTL)
-			return;
+			return 0;
+		break;
+	case ICMP_REDIRECT:
 		break;
 	}
 
-	read_lock(&ipip6_lock);
-	t = ipip6_tunnel_lookup(iph->daddr, iph->saddr);
-	if (t == NULL || t->parms.iph.daddr == 0)
+	err = -ENOENT;
+
+	t = ipip6_tunnel_lookup(dev_net(skb->dev),
+				skb->dev,
+				iph->daddr,
+				iph->saddr);
+	if (t == NULL)
+		goto out;
+
+	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+				 t->parms.link, 0, IPPROTO_IPV6, 0);
+		err = 0;
+		goto out;
+	}
+	if (type == ICMP_REDIRECT) {
+		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
+			      IPPROTO_IPV6, 0);
+		err = 0;
+		goto out;
+	}
+
+	if (t->parms.iph.daddr == 0)
 		goto out;
+
+	err = 0;
+	if (!ipip6_err_gen_icmpv6_unreach(skb))
+		goto out;
+
 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 		goto out;
 
-	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 		t->err_count++;
 	else
 		t->err_count = 1;
 	t->err_time = jiffies;
 out:
-	read_unlock(&ipip6_lock);
-	return;
-#else
-	struct iphdr *iph = (struct iphdr*)dp;
-	int hlen = iph->ihl<<2;
-	struct ipv6hdr *iph6;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
-	int rel_type = 0;
-	int rel_code = 0;
-	int rel_info = 0;
-	struct sk_buff *skb2;
-	struct rt6_info *rt6i;
+	return err;
+}
 
-	if (len < hlen + sizeof(struct ipv6hdr))
-		return;
-	iph6 = (struct ipv6hdr*)(dp + hlen);
+static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
+				  const struct in6_addr *v6addr)
+{
+	__be32 v4embed = 0;
+	if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed)
+		return true;
+	return false;
+}
 
-	switch (type) {
-	default:
-		return;
-	case ICMP_PARAMETERPROB:
-		if (skb->h.icmph->un.gateway < hlen)
-			return;
+/* Checks if an address matches an address on the tunnel interface.
+ * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
+ * Long story:
+ * This function is called after we considered the packet as spoofed
+ * in is_spoofed_6rd.
+ * We may have a router that is doing NAT for proto 41 packets
+ * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
+ * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
+ * function will return true, dropping the packet.
+ * But, we can still check if is spoofed against the IP
+ * addresses associated with the interface.
+ */
+static bool only_dnatted(const struct ip_tunnel *tunnel,
+	const struct in6_addr *v6dst)
+{
+	int prefix_len;
 
-		/* So... This guy found something strange INSIDE encapsulated
-		   packet. Well, he is fool, but what can we do ?
-		 */
-		rel_type = ICMPV6_PARAMPROB;
-		rel_info = skb->h.icmph->un.gateway - hlen;
-		break;
+#ifdef CONFIG_IPV6_SIT_6RD
+	prefix_len = tunnel->ip6rd.prefixlen + 32
+		- tunnel->ip6rd.relay_prefixlen;
+#else
+	prefix_len = 48;
+#endif
+	return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
+}
 
-	case ICMP_DEST_UNREACH:
-		switch (code) {
-		case ICMP_SR_FAILED:
-		case ICMP_PORT_UNREACH:
-			/* Impossible event. */
-			return;
-		case ICMP_FRAG_NEEDED:
-			/* Too complicated case ... */
-			return;
-		default:
-			/* All others are translated to HOST_UNREACH.
-			   rfc2003 contains "deep thoughts" about NET_UNREACH,
-			   I believe, it is just ether pollution. --ANK
-			 */
-			rel_type = ICMPV6_DEST_UNREACH;
-			rel_code = ICMPV6_ADDR_UNREACH;
-			break;
-		}
-		break;
-	case ICMP_TIME_EXCEEDED:
-		if (code != ICMP_EXC_TTL)
-			return;
-		rel_type = ICMPV6_TIME_EXCEED;
-		rel_code = ICMPV6_EXC_HOPLIMIT;
-		break;
+/* Returns true if a packet is spoofed */
+static bool packet_is_spoofed(struct sk_buff *skb,
+			      const struct iphdr *iph,
+			      struct ip_tunnel *tunnel)
+{
+	const struct ipv6hdr *ipv6h;
+
+	if (tunnel->dev->priv_flags & IFF_ISATAP) {
+		if (!isatap_chksrc(skb, iph, tunnel))
+			return true;
+
+		return false;
 	}
 
-	/* Prepare fake skb to feed it to icmpv6_send */
-	skb2 = skb_clone(skb, GFP_ATOMIC);
-	if (skb2 == NULL)
-		return;
-	dst_release(skb2->dst);
-	skb2->dst = NULL;
-	skb_pull(skb2, skb->data - (u8*)iph6);
-	skb2->nh.raw = skb2->data;
-
-	/* Try to guess incoming interface */
-	rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
-	if (rt6i && rt6i->rt6i_dev) {
-		skb2->dev = rt6i->rt6i_dev;
-
-		rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);
-
-		if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
-			struct ip_tunnel * t = (struct ip_tunnel*)rt6i->rt6i_dev->priv;
-			if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
-				rel_type = ICMPV6_DEST_UNREACH;
-				rel_code = ICMPV6_ADDR_UNREACH;
-			}
-			icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
-		}
+	if (tunnel->dev->flags & IFF_POINTOPOINT)
+		return false;
+
+	ipv6h = ipv6_hdr(skb);
+
+	if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
+		net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+				     &iph->saddr, &ipv6h->saddr,
+				     &iph->daddr, &ipv6h->daddr);
+		return true;
 	}
-	kfree_skb(skb2);
-	return;
-#endif
-}
 
-static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
-{
-	if (INET_ECN_is_ce(iph->tos))
-		IP6_ECN_set_ce(skb->nh.ipv6h);
+	if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
+		return false;
+
+	if (only_dnatted(tunnel, &ipv6h->daddr))
+		return false;
+
+	net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+			     &iph->saddr, &ipv6h->saddr,
+			     &iph->daddr, &ipv6h->daddr);
+	return true;
 }
 
 static int ipip6_rcv(struct sk_buff *skb)
 {
-	struct iphdr *iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	struct ip_tunnel *tunnel;
+	int err;
 
-	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-		goto out;
+	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
+				     iph->saddr, iph->daddr);
+	if (tunnel != NULL) {
+		struct pcpu_sw_netstats *tstats;
 
-	iph = skb->nh.iph;
+		if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
+		    tunnel->parms.iph.protocol != 0)
+			goto out;
 
-	read_lock(&ipip6_lock);
-	if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
-		secpath_reset(skb);
-		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = skb->data;
-		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+		skb->mac_header = skb->network_header;
+		skb_reset_network_header(skb);
+		IPCB(skb)->flags = 0;
 		skb->protocol = htons(ETH_P_IPV6);
-		skb->pkt_type = PACKET_HOST;
-		tunnel->stat.rx_packets++;
-		tunnel->stat.rx_bytes += skb->len;
-		skb->dev = tunnel->dev;
-		dst_release(skb->dst);
-		skb->dst = NULL;
-		nf_reset(skb);
-		ipip6_ecn_decapsulate(iph, skb);
+
+		if (packet_is_spoofed(skb, iph, tunnel)) {
+			tunnel->dev->stats.rx_errors++;
+			goto out;
+		}
+
+		__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+
+		err = IP_ECN_decapsulate(iph, skb);
+		if (unlikely(err)) {
+			if (log_ecn_error)
+				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+						     &iph->saddr, iph->tos);
+			if (err > 1) {
+				++tunnel->dev->stats.rx_frame_errors;
+				++tunnel->dev->stats.rx_errors;
+				goto out;
+			}
+		}
+
+		tstats = this_cpu_ptr(tunnel->dev->tstats);
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->rx_packets++;
+		tstats->rx_bytes += skb->len;
+		u64_stats_update_end(&tstats->syncp);
+
 		netif_rx(skb);
-		read_unlock(&ipip6_lock);
+
 		return 0;
 	}
 
-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
-	kfree_skb(skb);
-	read_unlock(&ipip6_lock);
+	/* no tunnel matched,  let upstream know, ipsec may handle it */
+	return 1;
 out:
+	kfree_skb(skb);
 	return 0;
 }
 
-/* Returns the embedded IPv4 address if the IPv6 address
-   comes from 6to4 (RFC 3056) addr space */
+static const struct tnl_ptk_info tpi = {
+	/* no tunnel info required for ipip. */
+	.proto = htons(ETH_P_IP),
+};
 
-static inline u32 try_6to4(struct in6_addr *v6dst)
+static int ipip_rcv(struct sk_buff *skb)
 {
-	u32 dst = 0;
+	const struct iphdr *iph;
+	struct ip_tunnel *tunnel;
+
+	iph = ip_hdr(skb);
+	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
+				     iph->saddr, iph->daddr);
+	if (tunnel != NULL) {
+		if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+		    tunnel->parms.iph.protocol != 0)
+			goto drop;
+
+		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+			goto drop;
+		if (iptunnel_pull_header(skb, 0, tpi.proto))
+			goto drop;
+		return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+	}
 
+	return 1;
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+/*
+ * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
+ * stores the embedded IPv4 address in v4dst and returns true.
+ */
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+		      __be32 *v4dst)
+{
+#ifdef CONFIG_IPV6_SIT_6RD
+	if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
+			      tunnel->ip6rd.prefixlen)) {
+		unsigned int pbw0, pbi0;
+		int pbi1;
+		u32 d;
+
+		pbw0 = tunnel->ip6rd.prefixlen >> 5;
+		pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
+
+		d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
+		    tunnel->ip6rd.relay_prefixlen;
+
+		pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
+		if (pbi1 > 0)
+			d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
+			     (32 - pbi1);
+
+		*v4dst = tunnel->ip6rd.relay_prefix | htonl(d);
+		return true;
+	}
+#else
 	if (v6dst->s6_addr16[0] == htons(0x2002)) {
-	        /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
-		memcpy(&dst, &v6dst->s6_addr16[1], 4);
+		/* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
+		memcpy(v4dst, &v6dst->s6_addr16[1], 4);
+		return true;
 	}
+#endif
+	return false;
+}
+
+static inline __be32 try_6rd(struct ip_tunnel *tunnel,
+			     const struct in6_addr *v6dst)
+{
+	__be32 dst = 0;
+	check_6rd(tunnel, v6dst, &dst);
 	return dst;
 }
 
@@ -420,107 +803,140 @@ static inline u32 try_6to4(struct in6_addr *v6dst)
  *	and that skb is filled properly by that function.
  */
 
-static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
+				     struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
-	struct net_device_stats *stats = &tunnel->stat;
-	struct iphdr  *tiph = &tunnel->parms.iph;
-	struct ipv6hdr *iph6 = skb->nh.ipv6h;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	const struct iphdr  *tiph = &tunnel->parms.iph;
+	const struct ipv6hdr *iph6 = ipv6_hdr(skb);
 	u8     tos = tunnel->parms.iph.tos;
+	__be16 df = tiph->frag_off;
 	struct rtable *rt;     			/* Route to the other host */
-	struct net_device *tdev;			/* Device to other host */
-	struct iphdr  *iph;			/* Our new IP header */
-	int    max_headroom;			/* The extra header space needed */
-	u32    dst = tiph->daddr;
+	struct net_device *tdev;		/* Device to other host */
+	unsigned int max_headroom;		/* The extra header space needed */
+	__be32 dst = tiph->daddr;
+	struct flowi4 fl4;
 	int    mtu;
-	struct in6_addr *addr6;	
+	const struct in6_addr *addr6;
 	int addr_type;
-
-	if (tunnel->recursion++) {
-		tunnel->stat.collisions++;
-		goto tx_error;
-	}
+	u8 ttl;
+	int err;
 
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto tx_error;
 
+	if (tos == 1)
+		tos = ipv6_get_dsfield(iph6);
+
+	/* ISATAP (RFC4214) - must come before 6to4 */
+	if (dev->priv_flags & IFF_ISATAP) {
+		struct neighbour *neigh = NULL;
+		bool do_tx_error = false;
+
+		if (skb_dst(skb))
+			neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
+
+		if (neigh == NULL) {
+			net_dbg_ratelimited("nexthop == NULL\n");
+			goto tx_error;
+		}
+
+		addr6 = (const struct in6_addr *)&neigh->primary_key;
+		addr_type = ipv6_addr_type(addr6);
+
+		if ((addr_type & IPV6_ADDR_UNICAST) &&
+		     ipv6_addr_is_isatap(addr6))
+			dst = addr6->s6_addr32[3];
+		else
+			do_tx_error = true;
+
+		neigh_release(neigh);
+		if (do_tx_error)
+			goto tx_error;
+	}
+
 	if (!dst)
-		dst = try_6to4(&iph6->daddr);
+		dst = try_6rd(tunnel, &iph6->daddr);
 
 	if (!dst) {
 		struct neighbour *neigh = NULL;
+		bool do_tx_error = false;
 
-		if (skb->dst)
-			neigh = skb->dst->neighbour;
+		if (skb_dst(skb))
+			neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
 
 		if (neigh == NULL) {
-			if (net_ratelimit())
-				printk(KERN_DEBUG "sit: nexthop == NULL\n");
+			net_dbg_ratelimited("nexthop == NULL\n");
 			goto tx_error;
 		}
 
-		addr6 = (struct in6_addr*)&neigh->primary_key;
+		addr6 = (const struct in6_addr *)&neigh->primary_key;
 		addr_type = ipv6_addr_type(addr6);
 
 		if (addr_type == IPV6_ADDR_ANY) {
-			addr6 = &skb->nh.ipv6h->daddr;
+			addr6 = &ipv6_hdr(skb)->daddr;
 			addr_type = ipv6_addr_type(addr6);
 		}
 
-		if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
-			goto tx_error_icmp;
+		if ((addr_type & IPV6_ADDR_COMPATv4) != 0)
+			dst = addr6->s6_addr32[3];
+		else
+			do_tx_error = true;
 
-		dst = addr6->s6_addr32[3];
+		neigh_release(neigh);
+		if (do_tx_error)
+			goto tx_error;
 	}
 
-	{
-		struct flowi fl = { .nl_u = { .ip4_u =
-					      { .daddr = dst,
-						.saddr = tiph->saddr,
-						.tos = RT_TOS(tos) } },
-				    .oif = tunnel->parms.link,
-				    .proto = IPPROTO_IPV6 };
-		if (ip_route_output_key(&rt, &fl)) {
-			tunnel->stat.tx_carrier_errors++;
-			goto tx_error_icmp;
-		}
+	rt = ip_route_output_ports(tunnel->net, &fl4, NULL,
+				   dst, tiph->saddr,
+				   0, 0,
+				   IPPROTO_IPV6, RT_TOS(tos),
+				   tunnel->parms.link);
+	if (IS_ERR(rt)) {
+		dev->stats.tx_carrier_errors++;
+		goto tx_error_icmp;
 	}
 	if (rt->rt_type != RTN_UNICAST) {
 		ip_rt_put(rt);
-		tunnel->stat.tx_carrier_errors++;
+		dev->stats.tx_carrier_errors++;
 		goto tx_error_icmp;
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
-		tunnel->stat.collisions++;
+		dev->stats.collisions++;
 		goto tx_error;
 	}
 
-	if (tiph->frag_off)
-		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
-	else
-		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+	if (df) {
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
-	if (mtu < 68) {
-		tunnel->stat.collisions++;
-		ip_rt_put(rt);
-		goto tx_error;
-	}
-	if (mtu < IPV6_MIN_MTU)
-		mtu = IPV6_MIN_MTU;
-	if (tunnel->parms.iph.daddr && skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
+		if (mtu < 68) {
+			dev->stats.collisions++;
+			ip_rt_put(rt);
+			goto tx_error;
+		}
 
-	if (skb->len > mtu) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
-		ip_rt_put(rt);
-		goto tx_error;
+		if (mtu < IPV6_MIN_MTU) {
+			mtu = IPV6_MIN_MTU;
+			df = 0;
+		}
+
+		if (tunnel->parms.iph.daddr && skb_dst(skb))
+			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+
+		if (skb->len > mtu && !skb_is_gso(skb)) {
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			ip_rt_put(rt);
+			goto tx_error;
+		}
 	}
 
 	if (tunnel->err_count > 0) {
-		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+		if (time_before(jiffies,
+				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 			tunnel->err_count--;
 			dst_link_failure(skb);
 		} else
@@ -532,91 +948,231 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
 
-	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb) {
 			ip_rt_put(rt);
-  			stats->tx_dropped++;
-			dev_kfree_skb(skb);
-			tunnel->recursion--;
-			return 0;
+			dev->stats.tx_dropped++;
+			kfree_skb(skb);
+			return NETDEV_TX_OK;
 		}
 		if (skb->sk)
 			skb_set_owner_w(new_skb, skb->sk);
 		dev_kfree_skb(skb);
 		skb = new_skb;
-		iph6 = skb->nh.ipv6h;
+		iph6 = ipv6_hdr(skb);
 	}
+	ttl = tiph->ttl;
+	if (ttl == 0)
+		ttl = iph6->hop_limit;
+	tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
 
-	skb->h.raw = skb->nh.raw;
-	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+	if (IS_ERR(skb)) {
+		ip_rt_put(rt);
+		goto out;
+	}
 
-	/*
-	 *	Push down and install the IPIP header.
-	 */
+	err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+			    IPPROTO_IPV6, tos, ttl, df,
+			    !net_eq(tunnel->net, dev_net(dev)));
+	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+	return NETDEV_TX_OK;
 
-	iph 			=	skb->nh.iph;
-	iph->version		=	4;
-	iph->ihl		=	sizeof(struct iphdr)>>2;
-	if (mtu > IPV6_MIN_MTU)
-		iph->frag_off	=	htons(IP_DF);
-	else
-		iph->frag_off	=	0;
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	kfree_skb(skb);
+out:
+	dev->stats.tx_errors++;
+	return NETDEV_TX_OK;
+}
+
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	const struct iphdr  *tiph = &tunnel->parms.iph;
 
-	iph->protocol		=	IPPROTO_IPV6;
-	iph->tos		=	INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
-	iph->daddr		=	rt->rt_dst;
-	iph->saddr		=	rt->rt_src;
+	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+	if (IS_ERR(skb))
+		goto out;
 
-	if ((iph->ttl = tiph->ttl) == 0)
-		iph->ttl	=	iph6->hop_limit;
+	ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+	return NETDEV_TX_OK;
+out:
+	dev->stats.tx_errors++;
+	return NETDEV_TX_OK;
+}
 
-	nf_reset(skb);
+static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
+				   struct net_device *dev)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ipip_tunnel_xmit(skb, dev);
+		break;
+	case htons(ETH_P_IPV6):
+		ipip6_tunnel_xmit(skb, dev);
+		break;
+	default:
+		goto tx_err;
+	}
 
-	IPTUNNEL_XMIT();
-	tunnel->recursion--;
-	return 0;
+	return NETDEV_TX_OK;
 
-tx_error_icmp:
-	dst_link_failure(skb);
-tx_error:
-	stats->tx_errors++;
-	dev_kfree_skb(skb);
-	tunnel->recursion--;
+tx_err:
+	dev->stats.tx_errors++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+
+}
+
+static void ipip6_tunnel_bind_dev(struct net_device *dev)
+{
+	struct net_device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	const struct iphdr *iph;
+	struct flowi4 fl4;
+
+	tunnel = netdev_priv(dev);
+	iph = &tunnel->parms.iph;
+
+	if (iph->daddr) {
+		struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4,
+							  NULL,
+							  iph->daddr, iph->saddr,
+							  0, 0,
+							  IPPROTO_IPV6,
+							  RT_TOS(iph->tos),
+							  tunnel->parms.link);
+
+		if (!IS_ERR(rt)) {
+			tdev = rt->dst.dev;
+			ip_rt_put(rt);
+		}
+		dev->flags |= IFF_POINTOPOINT;
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
+
+	if (tdev) {
+		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+		dev->mtu = tdev->mtu - sizeof(struct iphdr);
+		if (dev->mtu < IPV6_MIN_MTU)
+			dev->mtu = IPV6_MIN_MTU;
+	}
+	dev->iflink = tunnel->parms.link;
+}
+
+static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
+{
+	struct net *net = t->net;
+	struct sit_net *sitn = net_generic(net, sit_net_id);
+
+	ipip6_tunnel_unlink(sitn, t);
+	synchronize_net();
+	t->parms.iph.saddr = p->iph.saddr;
+	t->parms.iph.daddr = p->iph.daddr;
+	memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
+	memcpy(t->dev->broadcast, &p->iph.daddr, 4);
+	ipip6_tunnel_link(sitn, t);
+	t->parms.iph.ttl = p->iph.ttl;
+	t->parms.iph.tos = p->iph.tos;
+	if (t->parms.link != p->link) {
+		t->parms.link = p->link;
+		ipip6_tunnel_bind_dev(t->dev);
+	}
+	ip_tunnel_dst_reset_all(t);
+	netdev_state_change(t->dev);
+}
+
+#ifdef CONFIG_IPV6_SIT_6RD
+static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
+				   struct ip_tunnel_6rd *ip6rd)
+{
+	struct in6_addr prefix;
+	__be32 relay_prefix;
+
+	if (ip6rd->relay_prefixlen > 32 ||
+	    ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64)
+		return -EINVAL;
+
+	ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen);
+	if (!ipv6_addr_equal(&prefix, &ip6rd->prefix))
+		return -EINVAL;
+	if (ip6rd->relay_prefixlen)
+		relay_prefix = ip6rd->relay_prefix &
+			       htonl(0xffffffffUL <<
+				     (32 - ip6rd->relay_prefixlen));
+	else
+		relay_prefix = 0;
+	if (relay_prefix != ip6rd->relay_prefix)
+		return -EINVAL;
+
+	t->ip6rd.prefix = prefix;
+	t->ip6rd.relay_prefix = relay_prefix;
+	t->ip6rd.prefixlen = ip6rd->prefixlen;
+	t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+	ip_tunnel_dst_reset_all(t);
+	netdev_state_change(t->dev);
 	return 0;
 }
+#endif
 
 static int
 ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	int err = 0;
 	struct ip_tunnel_parm p;
-	struct ip_tunnel *t;
+	struct ip_tunnel_prl prl;
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct net *net = t->net;
+	struct sit_net *sitn = net_generic(net, sit_net_id);
+#ifdef CONFIG_IPV6_SIT_6RD
+	struct ip_tunnel_6rd ip6rd;
+#endif
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
-		t = NULL;
-		if (dev == ipip6_fb_tunnel_dev) {
+#ifdef CONFIG_IPV6_SIT_6RD
+	case SIOCGET6RD:
+#endif
+		if (dev == sitn->fb_tunnel_dev) {
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 				err = -EFAULT;
 				break;
 			}
-			t = ipip6_tunnel_locate(&p, 0);
+			t = ipip6_tunnel_locate(net, &p, 0);
+			if (t == NULL)
+				t = netdev_priv(dev);
 		}
-		if (t == NULL)
-			t = (struct ip_tunnel*)dev->priv;
-		memcpy(&p, &t->parms, sizeof(p));
-		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
-			err = -EFAULT;
+
+		err = -EFAULT;
+		if (cmd == SIOCGETTUNNEL) {
+			memcpy(&p, &t->parms, sizeof(p));
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p,
+					 sizeof(p)))
+				goto done;
+#ifdef CONFIG_IPV6_SIT_6RD
+		} else {
+			ip6rd.prefix = t->ip6rd.prefix;
+			ip6rd.relay_prefix = t->ip6rd.relay_prefix;
+			ip6rd.prefixlen = t->ip6rd.prefixlen;
+			ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd,
+					 sizeof(ip6rd)))
+				goto done;
+#endif
+		}
+		err = 0;
 		break;
 
 	case SIOCADDTUNNEL:
 	case SIOCCHGTUNNEL:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			goto done;
 
 		err = -EFAULT;
@@ -624,15 +1180,19 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			goto done;
 
 		err = -EINVAL;
-		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
+		if (p.iph.protocol != IPPROTO_IPV6 &&
+		    p.iph.protocol != IPPROTO_IPIP &&
+		    p.iph.protocol != 0)
+			goto done;
+		if (p.iph.version != 4 ||
 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 			goto done;
 		if (p.iph.ttl)
 			p.iph.frag_off |= htons(IP_DF);
 
-		t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+		t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 
-		if (dev != ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+		if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 			if (t != NULL) {
 				if (t->dev != dev) {
 					err = -EEXIST;
@@ -644,23 +1204,14 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 					err = -EINVAL;
 					break;
 				}
-				t = (struct ip_tunnel*)dev->priv;
-				ipip6_tunnel_unlink(t);
-				t->parms.iph.saddr = p.iph.saddr;
-				t->parms.iph.daddr = p.iph.daddr;
-				memcpy(dev->dev_addr, &p.iph.saddr, 4);
-				memcpy(dev->broadcast, &p.iph.daddr, 4);
-				ipip6_tunnel_link(t);
-				netdev_state_change(dev);
+				t = netdev_priv(dev);
 			}
+
+			ipip6_tunnel_update(t, &p);
 		}
 
 		if (t) {
 			err = 0;
-			if (cmd == SIOCCHGTUNNEL) {
-				t->parms.iph.ttl = p.iph.ttl;
-				t->parms.iph.tos = p.iph.tos;
-			}
 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 				err = -EFAULT;
 		} else
@@ -669,24 +1220,82 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 
 	case SIOCDELTUNNEL:
 		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			goto done;
 
-		if (dev == ipip6_fb_tunnel_dev) {
+		if (dev == sitn->fb_tunnel_dev) {
 			err = -EFAULT;
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 				goto done;
 			err = -ENOENT;
-			if ((t = ipip6_tunnel_locate(&p, 0)) == NULL)
+			if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL)
 				goto done;
 			err = -EPERM;
-			if (t == ipip6_fb_tunnel_dev->priv)
+			if (t == netdev_priv(sitn->fb_tunnel_dev))
 				goto done;
 			dev = t->dev;
 		}
-		err = unregister_netdevice(dev);
+		unregister_netdevice(dev);
+		err = 0;
+		break;
+
+	case SIOCGETPRL:
+		err = -EINVAL;
+		if (dev == sitn->fb_tunnel_dev)
+			goto done;
+		err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
+		break;
+
+	case SIOCADDPRL:
+	case SIOCDELPRL:
+	case SIOCCHGPRL:
+		err = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			goto done;
+		err = -EINVAL;
+		if (dev == sitn->fb_tunnel_dev)
+			goto done;
+		err = -EFAULT;
+		if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
+			goto done;
+
+		switch (cmd) {
+		case SIOCDELPRL:
+			err = ipip6_tunnel_del_prl(t, &prl);
+			break;
+		case SIOCADDPRL:
+		case SIOCCHGPRL:
+			err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
+			break;
+		}
+		ip_tunnel_dst_reset_all(t);
+		netdev_state_change(dev);
 		break;
 
+#ifdef CONFIG_IPV6_SIT_6RD
+	case SIOCADD6RD:
+	case SIOCCHG6RD:
+	case SIOCDEL6RD:
+		err = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data,
+				   sizeof(ip6rd)))
+			goto done;
+
+		if (cmd != SIOCDEL6RD) {
+			err = ipip6_tunnel_update_6rd(t, &ip6rd);
+			if (err < 0)
+				goto done;
+		} else
+			ipip6_tunnel_clone_6rd(dev, sitn);
+
+		err = 0;
+		break;
+#endif
+
 	default:
 		err = -EINVAL;
 	}
@@ -695,11 +1304,6 @@ done:
 	return err;
 }
 
-static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev)
-{
-	return &(((struct ip_tunnel*)dev->priv)->stat);
-}
-
 static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
@@ -708,74 +1312,79 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+static const struct net_device_ops ipip6_netdev_ops = {
+	.ndo_uninit	= ipip6_tunnel_uninit,
+	.ndo_start_xmit	= sit_tunnel_xmit,
+	.ndo_do_ioctl	= ipip6_tunnel_ioctl,
+	.ndo_change_mtu	= ipip6_tunnel_change_mtu,
+	.ndo_get_stats64 = ip_tunnel_get_stats64,
+};
+
+static void ipip6_dev_free(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	free_percpu(tunnel->dst_cache);
+	free_percpu(dev->tstats);
+	free_netdev(dev);
+}
+
+#define SIT_FEATURES (NETIF_F_SG	   | \
+		      NETIF_F_FRAGLIST	   | \
+		      NETIF_F_HIGHDMA	   | \
+		      NETIF_F_GSO_SOFTWARE | \
+		      NETIF_F_HW_CSUM)
+
 static void ipip6_tunnel_setup(struct net_device *dev)
 {
-	SET_MODULE_OWNER(dev);
-	dev->uninit		= ipip6_tunnel_uninit;
-	dev->destructor 	= free_netdev;
-	dev->hard_start_xmit	= ipip6_tunnel_xmit;
-	dev->get_stats		= ipip6_tunnel_get_stats;
-	dev->do_ioctl		= ipip6_tunnel_ioctl;
-	dev->change_mtu		= ipip6_tunnel_change_mtu;
+	dev->netdev_ops		= &ipip6_netdev_ops;
+	dev->destructor 	= ipip6_dev_free;
 
 	dev->type		= ARPHRD_SIT;
 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
-	dev->mtu		= 1500 - sizeof(struct iphdr);
+	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
 	dev->flags		= IFF_NOARP;
+	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
+	dev->features		|= NETIF_F_LLTX;
+	dev->features		|= SIT_FEATURES;
+	dev->hw_features	|= SIT_FEATURES;
 }
 
 static int ipip6_tunnel_init(struct net_device *dev)
 {
-	struct net_device *tdev = NULL;
-	struct ip_tunnel *tunnel;
-	struct iphdr *iph;
-
-	tunnel = (struct ip_tunnel*)dev->priv;
-	iph = &tunnel->parms.iph;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 
 	tunnel->dev = dev;
-	strcpy(tunnel->parms.name, dev->name);
+	tunnel->net = dev_net(dev);
 
 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 
-	if (iph->daddr) {
-		struct flowi fl = { .nl_u = { .ip4_u =
-					      { .daddr = iph->daddr,
-						.saddr = iph->saddr,
-						.tos = RT_TOS(iph->tos) } },
-				    .oif = tunnel->parms.link,
-				    .proto = IPPROTO_IPV6 };
-		struct rtable *rt;
-		if (!ip_route_output_key(&rt, &fl)) {
-			tdev = rt->u.dst.dev;
-			ip_rt_put(rt);
-		}
-		dev->flags |= IFF_POINTOPOINT;
-	}
-
-	if (!tdev && tunnel->parms.link)
-		tdev = __dev_get_by_index(tunnel->parms.link);
+	ipip6_tunnel_bind_dev(dev);
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
 
-	if (tdev) {
-		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
-		dev->mtu = tdev->mtu - sizeof(struct iphdr);
-		if (dev->mtu < IPV6_MIN_MTU)
-			dev->mtu = IPV6_MIN_MTU;
+	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+	if (!tunnel->dst_cache) {
+		free_percpu(dev->tstats);
+		return -ENOMEM;
 	}
-	dev->iflink = tunnel->parms.link;
 
 	return 0;
 }
 
-static int __init ipip6_fb_tunnel_init(struct net_device *dev)
+static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = dev->priv;
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
+	struct net *net = dev_net(dev);
+	struct sit_net *sitn = net_generic(net, sit_net_id);
 
 	tunnel->dev = dev;
+	tunnel->net = dev_net(dev);
 	strcpy(tunnel->parms.name, dev->name);
 
 	iph->version		= 4;
@@ -783,68 +1392,441 @@ static int __init ipip6_fb_tunnel_init(struct net_device *dev)
 	iph->ihl		= 5;
 	iph->ttl		= 64;
 
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+	if (!tunnel->dst_cache) {
+		free_percpu(dev->tstats);
+		return -ENOMEM;
+	}
+
 	dev_hold(dev);
-	tunnels_wc[0]		= tunnel;
+	rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
+	return 0;
+}
+
+static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	u8 proto;
+
+	if (!data || !data[IFLA_IPTUN_PROTO])
+		return 0;
+
+	proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+	if (proto != IPPROTO_IPV6 &&
+	    proto != IPPROTO_IPIP &&
+	    proto != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void ipip6_netlink_parms(struct nlattr *data[],
+				struct ip_tunnel_parm *parms)
+{
+	memset(parms, 0, sizeof(*parms));
+
+	parms->iph.version = 4;
+	parms->iph.protocol = IPPROTO_IPV6;
+	parms->iph.ihl = 5;
+	parms->iph.ttl = 64;
+
+	if (!data)
+		return;
+
+	if (data[IFLA_IPTUN_LINK])
+		parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+	if (data[IFLA_IPTUN_LOCAL])
+		parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
+
+	if (data[IFLA_IPTUN_REMOTE])
+		parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
+
+	if (data[IFLA_IPTUN_TTL]) {
+		parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
+		if (parms->iph.ttl)
+			parms->iph.frag_off = htons(IP_DF);
+	}
+
+	if (data[IFLA_IPTUN_TOS])
+		parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+
+	if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
+		parms->iph.frag_off = htons(IP_DF);
+
+	if (data[IFLA_IPTUN_FLAGS])
+		parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+
+	if (data[IFLA_IPTUN_PROTO])
+		parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
+}
+
+#ifdef CONFIG_IPV6_SIT_6RD
+/* This function returns true when 6RD attributes are present in the nl msg */
+static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
+				    struct ip_tunnel_6rd *ip6rd)
+{
+	bool ret = false;
+	memset(ip6rd, 0, sizeof(*ip6rd));
+
+	if (!data)
+		return ret;
+
+	if (data[IFLA_IPTUN_6RD_PREFIX]) {
+		ret = true;
+		nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX],
+			   sizeof(struct in6_addr));
+	}
+
+	if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) {
+		ret = true;
+		ip6rd->relay_prefix =
+			nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]);
+	}
+
+	if (data[IFLA_IPTUN_6RD_PREFIXLEN]) {
+		ret = true;
+		ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]);
+	}
+
+	if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) {
+		ret = true;
+		ip6rd->relay_prefixlen =
+			nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]);
+	}
+
+	return ret;
+}
+#endif
+
+static int ipip6_newlink(struct net *src_net, struct net_device *dev,
+			 struct nlattr *tb[], struct nlattr *data[])
+{
+	struct net *net = dev_net(dev);
+	struct ip_tunnel *nt;
+#ifdef CONFIG_IPV6_SIT_6RD
+	struct ip_tunnel_6rd ip6rd;
+#endif
+	int err;
+
+	nt = netdev_priv(dev);
+	ipip6_netlink_parms(data, &nt->parms);
+
+	if (ipip6_tunnel_locate(net, &nt->parms, 0))
+		return -EEXIST;
+
+	err = ipip6_tunnel_create(dev);
+	if (err < 0)
+		return err;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+	if (ipip6_netlink_6rd_parms(data, &ip6rd))
+		err = ipip6_tunnel_update_6rd(nt, &ip6rd);
+#endif
+
+	return err;
+}
+
+static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
+			  struct nlattr *data[])
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct ip_tunnel_parm p;
+	struct net *net = t->net;
+	struct sit_net *sitn = net_generic(net, sit_net_id);
+#ifdef CONFIG_IPV6_SIT_6RD
+	struct ip_tunnel_6rd ip6rd;
+#endif
+
+	if (dev == sitn->fb_tunnel_dev)
+		return -EINVAL;
+
+	ipip6_netlink_parms(data, &p);
+
+	if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
+	    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
+		return -EINVAL;
+
+	t = ipip6_tunnel_locate(net, &p, 0);
+
+	if (t) {
+		if (t->dev != dev)
+			return -EEXIST;
+	} else
+		t = netdev_priv(dev);
+
+	ipip6_tunnel_update(t, &p);
+
+#ifdef CONFIG_IPV6_SIT_6RD
+	if (ipip6_netlink_6rd_parms(data, &ip6rd))
+		return ipip6_tunnel_update_6rd(t, &ip6rd);
+#endif
+
+	return 0;
+}
+
+static size_t ipip6_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_IPTUN_LINK */
+		nla_total_size(4) +
+		/* IFLA_IPTUN_LOCAL */
+		nla_total_size(4) +
+		/* IFLA_IPTUN_REMOTE */
+		nla_total_size(4) +
+		/* IFLA_IPTUN_TTL */
+		nla_total_size(1) +
+		/* IFLA_IPTUN_TOS */
+		nla_total_size(1) +
+		/* IFLA_IPTUN_PMTUDISC */
+		nla_total_size(1) +
+		/* IFLA_IPTUN_FLAGS */
+		nla_total_size(2) +
+		/* IFLA_IPTUN_PROTO */
+		nla_total_size(1) +
+#ifdef CONFIG_IPV6_SIT_6RD
+		/* IFLA_IPTUN_6RD_PREFIX */
+		nla_total_size(sizeof(struct in6_addr)) +
+		/* IFLA_IPTUN_6RD_RELAY_PREFIX */
+		nla_total_size(4) +
+		/* IFLA_IPTUN_6RD_PREFIXLEN */
+		nla_total_size(2) +
+		/* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
+		nla_total_size(2) +
+#endif
+		0;
+}
+
+static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_parm *parm = &tunnel->parms;
+
+	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
+	    nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
+	    nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
+	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
+	    nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+	    nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
+		       !!(parm->iph.frag_off & htons(IP_DF))) ||
+	    nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
+	    nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
+		goto nla_put_failure;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+	if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr),
+		    &tunnel->ip6rd.prefix) ||
+	    nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
+			 tunnel->ip6rd.relay_prefix) ||
+	    nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
+			tunnel->ip6rd.prefixlen) ||
+	    nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
+			tunnel->ip6rd.relay_prefixlen))
+		goto nla_put_failure;
+#endif
+
 	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
+	[IFLA_IPTUN_LINK]		= { .type = NLA_U32 },
+	[IFLA_IPTUN_LOCAL]		= { .type = NLA_U32 },
+	[IFLA_IPTUN_REMOTE]		= { .type = NLA_U32 },
+	[IFLA_IPTUN_TTL]		= { .type = NLA_U8 },
+	[IFLA_IPTUN_TOS]		= { .type = NLA_U8 },
+	[IFLA_IPTUN_PMTUDISC]		= { .type = NLA_U8 },
+	[IFLA_IPTUN_FLAGS]		= { .type = NLA_U16 },
+	[IFLA_IPTUN_PROTO]		= { .type = NLA_U8 },
+#ifdef CONFIG_IPV6_SIT_6RD
+	[IFLA_IPTUN_6RD_PREFIX]		= { .len = sizeof(struct in6_addr) },
+	[IFLA_IPTUN_6RD_RELAY_PREFIX]	= { .type = NLA_U32 },
+	[IFLA_IPTUN_6RD_PREFIXLEN]	= { .type = NLA_U16 },
+	[IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
+#endif
+};
+
+static void ipip6_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct net *net = dev_net(dev);
+	struct sit_net *sitn = net_generic(net, sit_net_id);
+
+	if (dev != sitn->fb_tunnel_dev)
+		unregister_netdevice_queue(dev, head);
 }
 
-static struct net_protocol sit_protocol = {
+static struct rtnl_link_ops sit_link_ops __read_mostly = {
+	.kind		= "sit",
+	.maxtype	= IFLA_IPTUN_MAX,
+	.policy		= ipip6_policy,
+	.priv_size	= sizeof(struct ip_tunnel),
+	.setup		= ipip6_tunnel_setup,
+	.validate	= ipip6_validate,
+	.newlink	= ipip6_newlink,
+	.changelink	= ipip6_changelink,
+	.get_size	= ipip6_get_size,
+	.fill_info	= ipip6_fill_info,
+	.dellink	= ipip6_dellink,
+};
+
+static struct xfrm_tunnel sit_handler __read_mostly = {
 	.handler	=	ipip6_rcv,
 	.err_handler	=	ipip6_err,
+	.priority	=	1,
+};
+
+static struct xfrm_tunnel ipip_handler __read_mostly = {
+	.handler	=	ipip_rcv,
+	.err_handler	=	ipip6_err,
+	.priority	=	2,
 };
 
-static void __exit sit_destroy_tunnels(void)
+static void __net_exit sit_destroy_tunnels(struct net *net,
+					   struct list_head *head)
 {
+	struct sit_net *sitn = net_generic(net, sit_net_id);
+	struct net_device *dev, *aux;
 	int prio;
 
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == &sit_link_ops)
+			unregister_netdevice_queue(dev, head);
+
 	for (prio = 1; prio < 4; prio++) {
 		int h;
 		for (h = 0; h < HASH_SIZE; h++) {
 			struct ip_tunnel *t;
-			while ((t = tunnels[prio][h]) != NULL)
-				unregister_netdevice(t->dev);
+
+			t = rtnl_dereference(sitn->tunnels[prio][h]);
+			while (t != NULL) {
+				/* If dev is in the same netns, it has already
+				 * been added to the list by the previous loop.
+				 */
+				if (!net_eq(dev_net(t->dev), net))
+					unregister_netdevice_queue(t->dev,
+								   head);
+				t = rtnl_dereference(t->next);
+			}
 		}
 	}
 }
 
-void __exit sit_cleanup(void)
+static int __net_init sit_init_net(struct net *net)
 {
-	inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
+	struct sit_net *sitn = net_generic(net, sit_net_id);
+	struct ip_tunnel *t;
+	int err;
+
+	sitn->tunnels[0] = sitn->tunnels_wc;
+	sitn->tunnels[1] = sitn->tunnels_l;
+	sitn->tunnels[2] = sitn->tunnels_r;
+	sitn->tunnels[3] = sitn->tunnels_r_l;
+
+	sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
+					   ipip6_tunnel_setup);
+	if (!sitn->fb_tunnel_dev) {
+		err = -ENOMEM;
+		goto err_alloc_dev;
+	}
+	dev_net_set(sitn->fb_tunnel_dev, net);
+	sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
+	/* FB netdevice is special: we have one, and only one per netns.
+	 * Allowing to move it to another netns is clearly unsafe.
+	 */
+	sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+
+	err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
+	if (err)
+		goto err_dev_free;
+
+	ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
+
+	if ((err = register_netdev(sitn->fb_tunnel_dev)))
+		goto err_reg_dev;
+
+	t = netdev_priv(sitn->fb_tunnel_dev);
+
+	strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
+	return 0;
+
+err_reg_dev:
+	dev_put(sitn->fb_tunnel_dev);
+err_dev_free:
+	ipip6_dev_free(sitn->fb_tunnel_dev);
+err_alloc_dev:
+	return err;
+}
+
+static void __net_exit sit_exit_net(struct net *net)
+{
+	LIST_HEAD(list);
 
 	rtnl_lock();
-	sit_destroy_tunnels();
-	unregister_netdevice(ipip6_fb_tunnel_dev);
+	sit_destroy_tunnels(net, &list);
+	unregister_netdevice_many(&list);
 	rtnl_unlock();
 }
 
-int __init sit_init(void)
+static struct pernet_operations sit_net_ops = {
+	.init = sit_init_net,
+	.exit = sit_exit_net,
+	.id   = &sit_net_id,
+	.size = sizeof(struct sit_net),
+};
+
+static void __exit sit_cleanup(void)
+{
+	rtnl_link_unregister(&sit_link_ops);
+	xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+
+	unregister_pernet_device(&sit_net_ops);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s */
+}
+
+static int __init sit_init(void)
 {
 	int err;
 
-	printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
+	pr_info("IPv6 over IPv4 tunneling driver\n");
 
-	if (inet_add_protocol(&sit_protocol, IPPROTO_IPV6) < 0) {
-		printk(KERN_INFO "sit init: Can't add protocol\n");
-		return -EAGAIN;
+	err = register_pernet_device(&sit_net_ops);
+	if (err < 0)
+		return err;
+	err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
+	if (err < 0) {
+		pr_info("%s: can't register ip6ip4\n", __func__);
+		goto xfrm_tunnel_failed;
 	}
-
-	ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", 
-					   ipip6_tunnel_setup);
-	if (!ipip6_fb_tunnel_dev) {
-		err = -ENOMEM;
-		goto err1;
+	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+	if (err < 0) {
+		pr_info("%s: can't register ip4ip4\n", __func__);
+		goto xfrm_tunnel4_failed;
 	}
+	err = rtnl_link_register(&sit_link_ops);
+	if (err < 0)
+		goto rtnl_link_failed;
 
-	ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init;
-
-	if ((err =  register_netdev(ipip6_fb_tunnel_dev)))
-		goto err2;
-
- out:
+out:
 	return err;
- err2:
-	free_netdev(ipip6_fb_tunnel_dev);
- err1:
-	inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
+
+rtnl_link_failed:
+	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel4_failed:
+	xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+xfrm_tunnel_failed:
+	unregister_pernet_device(&sit_net_ops);
 	goto out;
 }
+
+module_init(sit_init);
+module_exit(sit_cleanup);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("sit");
+MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
new file mode 100644
index 00000000000..a822b880689
--- /dev/null
+++ b/net/ipv6/syncookies.c
@@ -0,0 +1,272 @@
+/*
+ *  IPv6 Syncookies implementation for the Linux kernel
+ *
+ *  Authors:
+ *  Glenn Griffin	<ggriffin.kernel@gmail.com>
+ *
+ *  Based on IPv4 implementation by Andi Kleen
+ *  linux/net/ipv4/syncookies.c
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/tcp.h>
+#include <linux/random.h>
+#include <linux/cryptohash.h>
+#include <linux/kernel.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+
+#define COOKIEBITS 24	/* Upper bits store count */
+#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+
+static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS];
+
+/* RFC 2460, Section 8.3:
+ * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
+ *
+ * Due to IPV6_MIN_MTU=1280 the lowest possible MSS is 1220, which allows
+ * using higher values than ipv4 tcp syncookies.
+ * The other values are chosen based on ethernet (1500 and 9k MTU), plus
+ * one that accounts for common encap (PPPoe) overhead. Table must be sorted.
+ */
+static __u16 const msstab[] = {
+	1280 - 60, /* IPV6_MIN_MTU - 60 */
+	1480 - 60,
+	1500 - 60,
+	9000 - 60,
+};
+
+static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+					   struct request_sock *req,
+					   struct dst_entry *dst)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct sock *child;
+
+	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
+	if (child)
+		inet_csk_reqsk_queue_add(sk, req, child);
+	else
+		reqsk_free(req);
+
+	return child;
+}
+
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
+		      ipv6_cookie_scratch);
+
+static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
+		       __be16 sport, __be16 dport, u32 count, int c)
+{
+	__u32 *tmp;
+
+	net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret));
+
+	tmp  = __get_cpu_var(ipv6_cookie_scratch);
+
+	/*
+	 * we have 320 bits of information to hash, copy in the remaining
+	 * 192 bits required for sha_transform, from the syncookie6_secret
+	 * and overwrite the digest with the secret
+	 */
+	memcpy(tmp + 10, syncookie6_secret[c], 44);
+	memcpy(tmp, saddr, 16);
+	memcpy(tmp + 4, daddr, 16);
+	tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
+	tmp[9] = count;
+	sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
+
+	return tmp[17];
+}
+
+static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
+				   const struct in6_addr *daddr,
+				   __be16 sport, __be16 dport, __u32 sseq,
+				   __u32 data)
+{
+	u32 count = tcp_cookie_time();
+	return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
+		sseq + (count << COOKIEBITS) +
+		((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
+		& COOKIEMASK));
+}
+
+static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
+				  const struct in6_addr *daddr, __be16 sport,
+				  __be16 dport, __u32 sseq)
+{
+	__u32 diff, count = tcp_cookie_time();
+
+	cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
+
+	diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
+	if (diff >= MAX_SYNCOOKIE_AGE)
+		return (__u32)-1;
+
+	return (cookie -
+		cookie_hash(saddr, daddr, sport, dport, count - diff, 1))
+		& COOKIEMASK;
+}
+
+u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
+			      const struct tcphdr *th, __u16 *mssp)
+{
+	int mssind;
+	const __u16 mss = *mssp;
+
+	for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
+		if (mss >= msstab[mssind])
+			break;
+
+	*mssp = msstab[mssind];
+
+	return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
+				     th->dest, ntohl(th->seq), mssind);
+}
+EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
+
+__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
+
+	tcp_synq_overflow(sk);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+
+	return __cookie_v6_init_sequence(iph, th, mssp);
+}
+
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
+		      __u32 cookie)
+{
+	__u32 seq = ntohl(th->seq) - 1;
+	__u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
+					    th->source, th->dest, seq);
+
+	return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
+}
+EXPORT_SYMBOL_GPL(__cookie_v6_check);
+
+struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_options_received tcp_opt;
+	struct inet_request_sock *ireq;
+	struct tcp_request_sock *treq;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcphdr *th = tcp_hdr(skb);
+	__u32 cookie = ntohl(th->ack_seq) - 1;
+	struct sock *ret = sk;
+	struct request_sock *req;
+	int mss;
+	struct dst_entry *dst;
+	__u8 rcv_wscale;
+	bool ecn_ok = false;
+
+	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
+		goto out;
+
+	if (tcp_synq_no_recent_overflow(sk) ||
+		(mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+		goto out;
+	}
+
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, 0, NULL);
+
+	if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
+		goto out;
+
+	ret = NULL;
+	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+	if (!req)
+		goto out;
+
+	ireq = inet_rsk(req);
+	treq = tcp_rsk(req);
+	treq->listener = NULL;
+
+	if (security_inet_conn_request(sk, skb, req))
+		goto out_free;
+
+	req->mss = mss;
+	ireq->ir_rmt_port = th->source;
+	ireq->ir_num = ntohs(th->dest);
+	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+	if (ipv6_opt_accepted(sk, skb) ||
+	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+		atomic_inc(&skb->users);
+		ireq->pktopts = skb;
+	}
+
+	ireq->ir_iif = sk->sk_bound_dev_if;
+	/* So that link locals have meaning */
+	if (!sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq->ir_iif = inet6_iif(skb);
+
+	ireq->ir_mark = inet_request_mark(sk, skb);
+
+	req->expires = 0UL;
+	req->num_retrans = 0;
+	ireq->ecn_ok		= ecn_ok;
+	ireq->snd_wscale	= tcp_opt.snd_wscale;
+	ireq->sack_ok		= tcp_opt.sack_ok;
+	ireq->wscale_ok		= tcp_opt.wscale_ok;
+	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+	treq->rcv_isn = ntohl(th->seq) - 1;
+	treq->snt_isn = cookie;
+
+	/*
+	 * We need to lookup the dst_entry to get the correct window size.
+	 * This is taken from tcp_v6_syn_recv_sock.  Somebody please enlighten
+	 * me if there is a preferred way.
+	 */
+	{
+		struct in6_addr *final_p, final;
+		struct flowi6 fl6;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_TCP;
+		fl6.daddr = ireq->ir_v6_rmt_addr;
+		final_p = fl6_update_dst(&fl6, np->opt, &final);
+		fl6.saddr = ireq->ir_v6_loc_addr;
+		fl6.flowi6_oif = sk->sk_bound_dev_if;
+		fl6.flowi6_mark = ireq->ir_mark;
+		fl6.fl6_dport = ireq->ir_rmt_port;
+		fl6.fl6_sport = inet_sk(sk)->inet_sport;
+		security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+
+		dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+		if (IS_ERR(dst))
+			goto out_free;
+	}
+
+	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+	tcp_select_initial_window(tcp_full_space(sk), req->mss,
+				  &req->rcv_wnd, &req->window_clamp,
+				  ireq->wscale_ok, &rcv_wscale,
+				  dst_metric(dst, RTAX_INITRWND));
+
+	ireq->rcv_wscale = rcv_wscale;
+
+	ret = get_cookie_sock(sk, skb, req, dst);
+out:
+	return ret;
+out_free:
+	reqsk_free(req);
+	return NULL;
+}
+
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 8eff9fa1e98..058f3eca2e5 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -7,116 +7,159 @@
 
 #include <linux/mm.h>
 #include <linux/sysctl.h>
-#include <linux/config.h>
 #include <linux/in6.h>
 #include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <linux/export.h>
 #include <net/ndisc.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
+#include <net/inet_frag.h>
 
-#ifdef CONFIG_SYSCTL
-
-static ctl_table ipv6_table[] = {
-	{
-		.ctl_name	= NET_IPV6_ROUTE,
-		.procname	= "route",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= ipv6_route_table
-	},
-	{
-		.ctl_name	= NET_IPV6_ICMP,
-		.procname	= "icmp",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= ipv6_icmp_table
-	},
+static struct ctl_table ipv6_table_template[] = {
 	{
-		.ctl_name	= NET_IPV6_BINDV6ONLY,
 		.procname	= "bindv6only",
-		.data		= &sysctl_ipv6_bindv6only,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
-		.procname	= "ip6frag_high_thresh",
-		.data		= &sysctl_ip6frag_high_thresh,
+		.data		= &init_net.ipv6.sysctl.bindv6only,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_IPV6_IP6FRAG_LOW_THRESH,
-		.procname	= "ip6frag_low_thresh",
-		.data		= &sysctl_ip6frag_low_thresh,
+		.procname	= "anycast_src_echo_reply",
+		.data		= &init_net.ipv6.sysctl.anycast_src_echo_reply,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_IPV6_IP6FRAG_TIME,
-		.procname	= "ip6frag_time",
-		.data		= &sysctl_ip6frag_time,
+		.procname	= "flowlabel_consistency",
+		.data		= &init_net.ipv6.sysctl.flowlabel_consistency,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-		.strategy	= &sysctl_jiffies,
+		.proc_handler	= proc_dointvec
 	},
 	{
-		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
-		.procname	= "ip6frag_secret_interval",
-		.data		= &sysctl_ip6frag_secret_interval,
+		.procname	= "fwmark_reflect",
+		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-		.strategy	= &sysctl_jiffies
+		.proc_handler	= proc_dointvec
 	},
+	{ }
+};
+
+static struct ctl_table ipv6_rotable[] = {
 	{
-		.ctl_name	= NET_IPV6_MLD_MAX_MSF,
 		.procname	= "mld_max_msf",
 		.data		= &sysctl_mld_max_msf,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= proc_dointvec
 	},
-	{ .ctl_name = 0 }
+	{ }
 };
 
-static struct ctl_table_header *ipv6_sysctl_header;
+static int __net_init ipv6_sysctl_net_init(struct net *net)
+{
+	struct ctl_table *ipv6_table;
+	struct ctl_table *ipv6_route_table;
+	struct ctl_table *ipv6_icmp_table;
+	int err;
 
-static ctl_table ipv6_net_table[] = {
-	{
-		.ctl_name	= NET_IPV6,
-		.procname	= "ipv6",
-		.mode		= 0555,
-		.child		= ipv6_table
-	},
-        { .ctl_name = 0 }
-};
+	err = -ENOMEM;
+	ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
+			     GFP_KERNEL);
+	if (!ipv6_table)
+		goto out;
+	ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
+	ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
+	ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
 
-static ctl_table ipv6_root_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ipv6_net_table
-	},
-        { .ctl_name = 0 }
-};
+	ipv6_route_table = ipv6_route_sysctl_init(net);
+	if (!ipv6_route_table)
+		goto out_ipv6_table;
 
-void ipv6_sysctl_register(void)
-{
-	ipv6_sysctl_header = register_sysctl_table(ipv6_root_table, 0);
+	ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
+	if (!ipv6_icmp_table)
+		goto out_ipv6_route_table;
+
+	net->ipv6.sysctl.hdr = register_net_sysctl(net, "net/ipv6", ipv6_table);
+	if (!net->ipv6.sysctl.hdr)
+		goto out_ipv6_icmp_table;
+
+	net->ipv6.sysctl.route_hdr =
+		register_net_sysctl(net, "net/ipv6/route", ipv6_route_table);
+	if (!net->ipv6.sysctl.route_hdr)
+		goto out_unregister_ipv6_table;
+
+	net->ipv6.sysctl.icmp_hdr =
+		register_net_sysctl(net, "net/ipv6/icmp", ipv6_icmp_table);
+	if (!net->ipv6.sysctl.icmp_hdr)
+		goto out_unregister_route_table;
+
+	err = 0;
+out:
+	return err;
+out_unregister_route_table:
+	unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
+out_unregister_ipv6_table:
+	unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
+out_ipv6_icmp_table:
+	kfree(ipv6_icmp_table);
+out_ipv6_route_table:
+	kfree(ipv6_route_table);
+out_ipv6_table:
+	kfree(ipv6_table);
+	goto out;
 }
 
-void ipv6_sysctl_unregister(void)
+static void __net_exit ipv6_sysctl_net_exit(struct net *net)
 {
-	unregister_sysctl_table(ipv6_sysctl_header);
+	struct ctl_table *ipv6_table;
+	struct ctl_table *ipv6_route_table;
+	struct ctl_table *ipv6_icmp_table;
+
+	ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg;
+	ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg;
+	ipv6_icmp_table = net->ipv6.sysctl.icmp_hdr->ctl_table_arg;
+
+	unregister_net_sysctl_table(net->ipv6.sysctl.icmp_hdr);
+	unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
+	unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
+
+	kfree(ipv6_table);
+	kfree(ipv6_route_table);
+	kfree(ipv6_icmp_table);
 }
 
-#endif /* CONFIG_SYSCTL */
+static struct pernet_operations ipv6_sysctl_net_ops = {
+	.init = ipv6_sysctl_net_init,
+	.exit = ipv6_sysctl_net_exit,
+};
+
+static struct ctl_table_header *ip6_header;
 
+int ipv6_sysctl_register(void)
+{
+	int err = -ENOMEM;
 
+	ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable);
+	if (ip6_header == NULL)
+		goto out;
 
+	err = register_pernet_subsys(&ipv6_sysctl_net_ops);
+	if (err)
+		goto err_pernet;
+out:
+	return err;
+
+err_pernet:
+	unregister_net_sysctl_table(ip6_header);
+	goto out;
+}
+
+void ipv6_sysctl_unregister(void)
+{
+	unregister_net_sysctl_table(ip6_header);
+	unregister_pernet_subsys(&ipv6_sysctl_net_ops);
+}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d693cb988b7..229239ad96b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1,13 +1,11 @@
 /*
  *	TCP over IPv6
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
- *
- *	Based on: 
+ *	Based on:
  *	linux/net/ipv4/tcp.c
  *	linux/net/ipv4/tcp_input.c
  *	linux/net/ipv4/tcp_output.c
@@ -25,8 +23,8 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#include <linux/bottom_half.h>
 #include <linux/module.h>
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -40,7 +38,8 @@
 #include <linux/jhash.h>
 #include <linux/ipsec.h>
 #include <linux/times.h>
-
+#include <linux/slab.h>
+#include <linux/uaccess.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/random.h>
@@ -48,6 +47,7 @@
 #include <net/tcp.h>
 #include <net/ndisc.h>
 #include <net/inet6_hashtables.h>
+#include <net/inet6_connection_sock.h>
 #include <net/ipv6.h>
 #include <net/transp_v6.h>
 #include <net/addrconf.h>
@@ -56,493 +56,118 @@
 #include <net/inet_ecn.h>
 #include <net/protocol.h>
 #include <net/xfrm.h>
-#include <net/addrconf.h>
 #include <net/snmp.h>
 #include <net/dsfield.h>
-
-#include <asm/uaccess.h>
+#include <net/timewait_sock.h>
+#include <net/netdma.h>
+#include <net/inet_common.h>
+#include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
+#include <net/busy_poll.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-static void	tcp_v6_send_reset(struct sk_buff *skb);
-static void	tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
-static void	tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
-				  struct sk_buff *skb);
-
-static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-static int	tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
-
-static struct tcp_func ipv6_mapped;
-static struct tcp_func ipv6_specific;
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
 
-static inline int tcp_v6_bind_conflict(const struct sock *sk,
-				       const struct inet_bind_bucket *tb)
-{
-	const struct sock *sk2;
-	const struct hlist_node *node;
-
-	/* We must walk the whole port owner list in this case. -DaveM */
-	sk_for_each_bound(sk2, node, &tb->owners) {
-		if (sk != sk2 &&
-		    (!sk->sk_bound_dev_if ||
-		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
-		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
-		     ipv6_rcv_saddr_equal(sk, sk2))
-			break;
-	}
+static void	tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
+static void	tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+				      struct request_sock *req);
 
-	return node != NULL;
-}
+static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
 
-/* Grrr, addr_type already calculated by caller, but I don't want
- * to add some silly "cookie" argument to this method just for that.
- * But it doesn't matter, the recalculation is in the rarest path
- * this function ever takes.
- */
-static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
+static const struct inet_connection_sock_af_ops ipv6_mapped;
+static const struct inet_connection_sock_af_ops ipv6_specific;
+#ifdef CONFIG_TCP_MD5SIG
+static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
+static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
+#else
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+						   const struct in6_addr *addr)
 {
-	struct inet_bind_hashbucket *head;
-	struct inet_bind_bucket *tb;
-	struct hlist_node *node;
-	int ret;
-
-	local_bh_disable();
-	if (snum == 0) {
-		int low = sysctl_local_port_range[0];
-		int high = sysctl_local_port_range[1];
-		int remaining = (high - low) + 1;
-		int rover;
-
-		spin_lock(&tcp_hashinfo.portalloc_lock);
-		if (tcp_hashinfo.port_rover < low)
-			rover = low;
-		else
-			rover = tcp_hashinfo.port_rover;
-		do {	rover++;
-			if (rover > high)
-				rover = low;
-			head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
-			spin_lock(&head->lock);
-			inet_bind_bucket_for_each(tb, node, &head->chain)
-				if (tb->port == rover)
-					goto next;
-			break;
-		next:
-			spin_unlock(&head->lock);
-		} while (--remaining > 0);
-		tcp_hashinfo.port_rover = rover;
-		spin_unlock(&tcp_hashinfo.portalloc_lock);
-
-		/* Exhausted local port range during search?  It is not
-		 * possible for us to be holding one of the bind hash
-		 * locks if this test triggers, because if 'remaining'
-		 * drops to zero, we broke out of the do/while loop at
-		 * the top level, not from the 'break;' statement.
-		 */
-		ret = 1;
-		if (unlikely(remaining <= 0))
-			goto fail;
-
-		/* OK, here is the one we will use. */
-		snum = rover;
-	} else {
-		head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
-		spin_lock(&head->lock);
-		inet_bind_bucket_for_each(tb, node, &head->chain)
-			if (tb->port == snum)
-				goto tb_found;
-	}
-	tb = NULL;
-	goto tb_not_found;
-tb_found:
-	if (tb && !hlist_empty(&tb->owners)) {
-		if (tb->fastreuse > 0 && sk->sk_reuse &&
-		    sk->sk_state != TCP_LISTEN) {
-			goto success;
-		} else {
-			ret = 1;
-			if (tcp_v6_bind_conflict(sk, tb))
-				goto fail_unlock;
-		}
-	}
-tb_not_found:
-	ret = 1;
-	if (tb == NULL) {
-	       	tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
-		if (tb == NULL)
-			goto fail_unlock;
-	}
-	if (hlist_empty(&tb->owners)) {
-		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
-			tb->fastreuse = 1;
-		else
-			tb->fastreuse = 0;
-	} else if (tb->fastreuse &&
-		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
-		tb->fastreuse = 0;
-
-success:
-	if (!inet_csk(sk)->icsk_bind_hash)
-		inet_bind_hash(sk, tb, snum);
-	BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
-	ret = 0;
-
-fail_unlock:
-	spin_unlock(&head->lock);
-fail:
-	local_bh_enable();
-	return ret;
+	return NULL;
 }
+#endif
 
-static __inline__ void __tcp_v6_hash(struct sock *sk)
+static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 {
-	struct hlist_head *list;
-	rwlock_t *lock;
-
-	BUG_TRAP(sk_unhashed(sk));
-
-	if (sk->sk_state == TCP_LISTEN) {
-		list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
-		lock = &tcp_hashinfo.lhash_lock;
-		inet_listen_wlock(&tcp_hashinfo);
-	} else {
-		unsigned int hash;
-		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
-		hash &= (tcp_hashinfo.ehash_size - 1);
-		list = &tcp_hashinfo.ehash[hash].chain;
-		lock = &tcp_hashinfo.ehash[hash].lock;
-		write_lock(lock);
-	}
-
-	__sk_add_node(sk, list);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(lock);
+	struct dst_entry *dst = skb_dst(skb);
+	const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+	dst_hold(dst);
+	sk->sk_rx_dst = dst;
+	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+	if (rt->rt6i_node)
+		inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
 }
 
-
 static void tcp_v6_hash(struct sock *sk)
 {
 	if (sk->sk_state != TCP_CLOSE) {
-		struct tcp_sock *tp = tcp_sk(sk);
-
-		if (tp->af_specific == &ipv6_mapped) {
+		if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
 			tcp_prot.hash(sk);
 			return;
 		}
 		local_bh_disable();
-		__tcp_v6_hash(sk);
+		__inet6_hash(sk, NULL);
 		local_bh_enable();
 	}
 }
 
-/*
- * Open request hash tables.
- */
-
-static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
+static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
 {
-	u32 a, b, c;
-
-	a = raddr->s6_addr32[0];
-	b = raddr->s6_addr32[1];
-	c = raddr->s6_addr32[2];
-
-	a += JHASH_GOLDEN_RATIO;
-	b += JHASH_GOLDEN_RATIO;
-	c += rnd;
-	__jhash_mix(a, b, c);
-
-	a += raddr->s6_addr32[3];
-	b += (u32) rport;
-	__jhash_mix(a, b, c);
-
-	return c & (TCP_SYNQ_HSIZE - 1);
-}
-
-static struct request_sock *tcp_v6_search_req(const struct sock *sk,
-					      struct request_sock ***prevp,
-					      __u16 rport,
-					      struct in6_addr *raddr,
-					      struct in6_addr *laddr,
-					      int iif)
-{
-	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	struct request_sock *req, **prev;  
-
-	for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
-	     (req = *prev) != NULL;
-	     prev = &req->dl_next) {
-		const struct tcp6_request_sock *treq = tcp6_rsk(req);
-
-		if (inet_rsk(req)->rmt_port == rport &&
-		    req->rsk_ops->family == AF_INET6 &&
-		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
-		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
-		    (!treq->iif || treq->iif == iif)) {
-			BUG_TRAP(req->sk == NULL);
-			*prevp = prev;
-			return req;
-		}
-	}
-
-	return NULL;
+	return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+					    ipv6_hdr(skb)->saddr.s6_addr32,
+					    tcp_hdr(skb)->dest,
+					    tcp_hdr(skb)->source);
 }
 
-static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
-				   struct in6_addr *saddr, 
-				   struct in6_addr *daddr, 
-				   unsigned long base)
-{
-	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
-}
-
-static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
-{
-	if (skb->protocol == htons(ETH_P_IPV6)) {
-		return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
-						    skb->nh.ipv6h->saddr.s6_addr32,
-						    skb->h.th->dest,
-						    skb->h.th->source);
-	} else {
-		return secure_tcp_sequence_number(skb->nh.iph->daddr,
-						  skb->nh.iph->saddr,
-						  skb->h.th->dest,
-						  skb->h.th->source);
-	}
-}
-
-static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
-				      struct inet_timewait_sock **twp)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *daddr = &np->rcv_saddr;
-	const struct in6_addr *saddr = &np->daddr;
-	const int dif = sk->sk_bound_dev_if;
-	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
-	struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-		const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
-
-		tw = inet_twsk(sk2);
-
-		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
-		   sk2->sk_family		== PF_INET6	&&
-		   ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)	&&
-		   ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)	&&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
-			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
-			struct tcp_sock *tp = tcp_sk(sk);
-
-			if (tcptw->tw_ts_recent_stamp &&
-			    (!twp ||
-			     (sysctl_tcp_tw_reuse &&
-			      xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
-				/* See comment in tcp_ipv4.c */
-				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-				if (!tp->write_seq)
-					tp->write_seq = 1;
-				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
-				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-				sock_hold(sk2);
-				goto unique;
-			} else
-				goto not_unique;
-		}
-	}
-	tw = NULL;
-
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-
-unique:
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sk->sk_hash = hash;
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &tcp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
-	}
-	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
-}
-
-static inline u32 tcpv6_port_offset(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-
-	return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
-					   np->daddr.s6_addr32,
-					   inet->dport);
-}
-
-static int tcp_v6_hash_connect(struct sock *sk)
-{
-	unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (!snum) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
-		int range = high - low;
- 		int i;
-		int port;
-		static u32 hint;
-		u32 offset = hint + tcpv6_port_offset(sk);
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
-		for (i = 1; i <= range; i++) {
-			port = low + (i + offset) % range;
- 			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__tcp_v6_check_established(sk,
-									port,
-									&tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- 			if (!tb) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 		}
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
-		hint += i;
-
- 		/* Head lock still held and bh's disabled */
- 		inet_bind_hash(sk, tb, port);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(port);
- 			__tcp_v6_hash(sk);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw) {
- 			inet_twsk_deschedule(tw, &tcp_death_row);
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- 	tb   = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-
-	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__tcp_v6_hash(sk);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __tcp_v6_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
-
-static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
+static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			  int addr_len)
 {
 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock *inet = inet_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct in6_addr *saddr = NULL, *final_p = NULL, final;
-	struct flowi fl;
+	struct in6_addr *saddr = NULL, *final_p, final;
+	struct rt6_info *rt;
+	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
 	int err;
 
-	if (addr_len < SIN6_LEN_RFC2133) 
+	if (addr_len < SIN6_LEN_RFC2133)
 		return -EINVAL;
 
-	if (usin->sin6_family != AF_INET6) 
-		return(-EAFNOSUPPORT);
+	if (usin->sin6_family != AF_INET6)
+		return -EAFNOSUPPORT;
 
-	memset(&fl, 0, sizeof(fl));
+	memset(&fl6, 0, sizeof(fl6));
 
 	if (np->sndflow) {
-		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-		IP6_ECN_flow_init(fl.fl6_flowlabel);
-		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+		IP6_ECN_flow_init(fl6.flowlabel);
+		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 			struct ip6_flowlabel *flowlabel;
-			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
-			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
 			fl6_sock_release(flowlabel);
 		}
 	}
 
 	/*
-  	 *	connect() to INADDR_ANY means loopback (BSD'ism).
-  	 */
-  	
-  	if(ipv6_addr_any(&usin->sin6_addr))
-		usin->sin6_addr.s6_addr[15] = 0x1; 
+	 *	connect() to INADDR_ANY means loopback (BSD'ism).
+	 */
+
+	if (ipv6_addr_any(&usin->sin6_addr))
+		usin->sin6_addr.s6_addr[15] = 0x1;
 
 	addr_type = ipv6_addr_type(&usin->sin6_addr);
 
-	if(addr_type & IPV6_ADDR_MULTICAST)
+	if (addr_type & IPV6_ADDR_MULTICAST)
 		return -ENETUNREACH;
 
 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
@@ -564,21 +189,21 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	}
 
 	if (tp->rx_opt.ts_recent_stamp &&
-	    !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
+	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
 		tp->rx_opt.ts_recent = 0;
 		tp->rx_opt.ts_recent_stamp = 0;
 		tp->write_seq = 0;
 	}
 
-	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
-	np->flow_label = fl.fl6_flowlabel;
+	sk->sk_v6_daddr = usin->sin6_addr;
+	np->flow_label = fl6.flowlabel;
 
 	/*
 	 *	TCP over IPv4
 	 */
 
 	if (addr_type == IPV6_ADDR_MAPPED) {
-		u32 exthdrlen = tp->ext_header_len;
+		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 		struct sockaddr_in sin;
 
 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
@@ -590,84 +215,89 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		sin.sin_port = usin->sin6_port;
 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 
-		tp->af_specific = &ipv6_mapped;
+		icsk->icsk_af_ops = &ipv6_mapped;
 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
+#ifdef CONFIG_TCP_MD5SIG
+		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
+#endif
 
 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 
 		if (err) {
-			tp->ext_header_len = exthdrlen;
-			tp->af_specific = &ipv6_specific;
+			icsk->icsk_ext_hdr_len = exthdrlen;
+			icsk->icsk_af_ops = &ipv6_specific;
 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
+#ifdef CONFIG_TCP_MD5SIG
+			tp->af_specific = &tcp_sock_ipv6_specific;
+#endif
 			goto failure;
 		} else {
-			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
-				      inet->saddr);
-			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
-				      inet->rcv_saddr);
+			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+					       &sk->sk_v6_rcv_saddr);
 		}
 
 		return err;
 	}
 
-	if (!ipv6_addr_any(&np->rcv_saddr))
-		saddr = &np->rcv_saddr;
-
-	fl.proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-	ipv6_addr_copy(&fl.fl6_src,
-		       (saddr ? saddr : &np->saddr));
-	fl.oif = sk->sk_bound_dev_if;
-	fl.fl_ip_dport = usin->sin6_port;
-	fl.fl_ip_sport = inet->sport;
-
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+		saddr = &sk->sk_v6_rcv_saddr;
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
-		goto failure;
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
+	fl6.flowi6_proto = IPPROTO_TCP;
+	fl6.daddr = sk->sk_v6_daddr;
+	fl6.saddr = saddr ? *saddr : np->saddr;
+	fl6.flowi6_oif = sk->sk_bound_dev_if;
+	fl6.flowi6_mark = sk->sk_mark;
+	fl6.fl6_dport = usin->sin6_port;
+	fl6.fl6_sport = inet->inet_sport;
 
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	final_p = fl6_update_dst(&fl6, np->opt, &final);
+
+	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
 		goto failure;
+	}
 
 	if (saddr == NULL) {
-		saddr = &fl.fl6_src;
-		ipv6_addr_copy(&np->rcv_saddr, saddr);
+		saddr = &fl6.saddr;
+		sk->sk_v6_rcv_saddr = *saddr;
 	}
 
 	/* set the source address */
-	ipv6_addr_copy(&np->saddr, saddr);
-	inet->rcv_saddr = LOOPBACK4_IPV6;
+	np->saddr = *saddr;
+	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
-	ip6_dst_store(sk, dst, NULL);
-	sk->sk_route_caps = dst->dev->features &
-		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	sk->sk_gso_type = SKB_GSO_TCPV6;
+	__ip6_dst_store(sk, dst, NULL, NULL);
 
-	tp->ext_header_len = 0;
+	rt = (struct rt6_info *) dst;
+	if (tcp_death_row.sysctl_tw_recycle &&
+	    !tp->rx_opt.ts_recent_stamp &&
+	    ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
+		tcp_fetch_timewait_stamp(sk, dst);
+
+	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
-		tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
+		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+					  np->opt->opt_nflen);
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
-	inet->dport = usin->sin6_port;
+	inet->inet_dport = usin->sin6_port;
 
 	tcp_set_state(sk, TCP_SYN_SENT);
-	err = tcp_v6_hash_connect(sk);
+	err = inet6_hash_connect(&tcp_death_row, sk);
 	if (err)
 		goto late_failure;
 
-	if (!tp->write_seq)
+	if (!tp->write_seq && likely(!tp->repair))
 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
-							     np->daddr.s6_addr32,
-							     inet->sport,
-							     inet->dport);
+							     sk->sk_v6_daddr.s6_addr32,
+							     inet->inet_sport,
+							     inet->inet_dport);
 
 	err = tcp_connect(sk);
 	if (err)
@@ -679,97 +309,105 @@ late_failure:
 	tcp_set_state(sk, TCP_CLOSE);
 	__sk_dst_reset(sk);
 failure:
-	inet->dport = 0;
+	inet->inet_dport = 0;
 	sk->sk_route_caps = 0;
 	return err;
 }
 
+static void tcp_v6_mtu_reduced(struct sock *sk)
+{
+	struct dst_entry *dst;
+
+	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+		return;
+
+	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+	if (!dst)
+		return;
+
+	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+		tcp_sync_mss(sk, dst_mtu(dst));
+		tcp_simple_retransmit(sk);
+	}
+}
+
 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-		int type, int code, int offset, __u32 info)
+		u8 type, u8 code, int offset, __be32 info)
 {
-	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
+	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 	struct ipv6_pinfo *np;
 	struct sock *sk;
 	int err;
-	struct tcp_sock *tp; 
-	__u32 seq;
+	struct tcp_sock *tp;
+	struct request_sock *fastopen;
+	__u32 seq, snd_una;
+	struct net *net = dev_net(skb->dev);
 
-	sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
-			  th->source, skb->dev->ifindex);
+	sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
+			th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
 
 	if (sk == NULL) {
-		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
+				   ICMP6_MIB_INERRORS);
 		return;
 	}
 
 	if (sk->sk_state == TCP_TIME_WAIT) {
-		inet_twsk_put((struct inet_timewait_sock *)sk);
+		inet_twsk_put(inet_twsk(sk));
 		return;
 	}
 
 	bh_lock_sock(sk);
-	if (sock_owned_by_user(sk))
-		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
+		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
 
+	if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+		goto out;
+	}
+
 	tp = tcp_sk(sk);
-	seq = ntohl(th->seq); 
+	seq = ntohl(th->seq);
+	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+	fastopen = tp->fastopen_rsk;
+	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 	if (sk->sk_state != TCP_LISTEN &&
-	    !between(seq, tp->snd_una, tp->snd_nxt)) {
-		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+	    !between(seq, snd_una, tp->snd_nxt)) {
+		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 		goto out;
 	}
 
 	np = inet6_sk(sk);
 
-	if (type == ICMPV6_PKT_TOOBIG) {
-		struct dst_entry *dst = NULL;
-
-		if (sock_owned_by_user(sk))
-			goto out;
-		if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
-			goto out;
-
-		/* icmp should have updated the destination cache entry */
-		dst = __sk_dst_check(sk, np->dst_cookie);
-
-		if (dst == NULL) {
-			struct inet_sock *inet = inet_sk(sk);
-			struct flowi fl;
+	if (type == NDISC_REDIRECT) {
+		struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 
-			/* BUGGG_FUTURE: Again, it is not clear how
-			   to handle rthdr case. Ignore this complexity
-			   for now.
-			 */
-			memset(&fl, 0, sizeof(fl));
-			fl.proto = IPPROTO_TCP;
-			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-			fl.oif = sk->sk_bound_dev_if;
-			fl.fl_ip_dport = inet->dport;
-			fl.fl_ip_sport = inet->sport;
-
-			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
-				sk->sk_err_soft = -err;
-				goto out;
-			}
+		if (dst)
+			dst->ops->redirect(dst, sk, skb);
+		goto out;
+	}
 
-			if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
-				sk->sk_err_soft = -err;
-				goto out;
-			}
+	if (type == ICMPV6_PKT_TOOBIG) {
+		/* We are not interested in TCP_LISTEN and open_requests
+		 * (SYN-ACKs send out by Linux are always <576bytes so
+		 * they should go through unfragmented).
+		 */
+		if (sk->sk_state == TCP_LISTEN)
+			goto out;
 
-		} else
-			dst_hold(dst);
+		if (!ip6_sk_accept_pmtu(sk))
+			goto out;
 
-		if (tp->pmtu_cookie > dst_mtu(dst)) {
-			tcp_sync_mss(sk, dst_mtu(dst));
-			tcp_simple_retransmit(sk);
-		} /* else let the usual retransmit timer handle it */
-		dst_release(dst);
+		tp->mtu_info = ntohl(info);
+		if (!sock_owned_by_user(sk))
+			tcp_v6_mtu_reduced(sk);
+		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
+					   &tp->tsq_flags))
+			sock_hold(sk);
 		goto out;
 	}
 
@@ -782,29 +420,34 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (sock_owned_by_user(sk))
 			goto out;
 
-		req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
-					&hdr->saddr, inet6_iif(skb));
+		req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
+					   &hdr->saddr, inet6_iif(skb));
 		if (!req)
 			goto out;
 
 		/* ICMPs are not backlogged, hence we cannot get
 		 * an established socket here.
 		 */
-		BUG_TRAP(req->sk == NULL);
+		WARN_ON(req->sk != NULL);
 
 		if (seq != tcp_rsk(req)->snt_isn) {
-			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 			goto out;
 		}
 
 		inet_csk_reqsk_queue_drop(sk, req, prev);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 		goto out;
 
 	case TCP_SYN_SENT:
-	case TCP_SYN_RECV:  /* Cannot happen.
-			       It can, it SYNs are crossed. --ANK */ 
+	case TCP_SYN_RECV:
+		/* Only in fast or simultaneous open. If a fast open socket is
+		 * is already accepted it is treated as a connected one below.
+		 */
+		if (fastopen && fastopen->sk == NULL)
+			break;
+
 		if (!sock_owned_by_user(sk)) {
-			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 			sk->sk_err = err;
 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
 
@@ -826,200 +469,293 @@ out:
 }
 
 
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
-			      struct dst_entry *dst)
+static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
+			      struct flowi6 *fl6,
+			      struct request_sock *req,
+			      u16 queue_mapping,
+			      struct tcp_fastopen_cookie *foc)
 {
-	struct tcp6_request_sock *treq = tcp6_rsk(req);
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct sk_buff * skb;
-	struct ipv6_txoptions *opt = NULL;
-	struct in6_addr * final_p = NULL, final;
-	struct flowi fl;
-	int err = -1;
-
-	memset(&fl, 0, sizeof(fl));
-	fl.proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-	ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
-	fl.fl6_flowlabel = 0;
-	fl.oif = treq->iif;
-	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-	fl.fl_ip_sport = inet_sk(sk)->sport;
-
-	if (dst == NULL) {
-		opt = np->opt;
-		if (opt == NULL &&
-		    np->rxopt.bits.osrcrt == 2 &&
-		    treq->pktopts) {
-			struct sk_buff *pktopts = treq->pktopts;
-			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
-			if (rxopt->srcrt)
-				opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
-		}
+	struct sk_buff *skb;
+	int err = -ENOMEM;
 
-		if (opt && opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+	/* First, grab a route. */
+	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
+		goto done;
 
-		err = ip6_dst_lookup(sk, &dst, &fl);
-		if (err)
-			goto done;
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
-			goto done;
-	}
+	skb = tcp_make_synack(sk, dst, req, foc);
 
-	skb = tcp_make_synack(sk, dst, req);
 	if (skb) {
-		struct tcphdr *th = skb->h.th;
+		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
+				    &ireq->ir_v6_rmt_addr);
 
-		th->check = tcp_v6_check(th, skb->len,
-					 &treq->loc_addr, &treq->rmt_addr,
-					 csum_partial((char *)th, skb->len, skb->csum));
+		fl6->daddr = ireq->ir_v6_rmt_addr;
+		if (np->repflow && (ireq->pktopts != NULL))
+			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
-		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-		err = ip6_xmit(sk, skb, &fl, opt, 0);
-		if (err == NET_XMIT_CN)
-			err = 0;
+		skb_set_queue_mapping(skb, queue_mapping);
+		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+		err = net_xmit_eval(err);
 	}
 
 done:
-        if (opt && opt != np->opt)
-		sock_kfree_s(sk, opt, opt->tot_len);
 	return err;
 }
 
+static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
+{
+	struct flowi6 fl6;
+	int res;
+
+	res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL);
+	if (!res) {
+		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+	}
+	return res;
+}
+
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
-	if (tcp6_rsk(req)->pktopts)
-		kfree_skb(tcp6_rsk(req)->pktopts);
+	kfree_skb(inet_rsk(req)->pktopts);
 }
 
-static struct request_sock_ops tcp6_request_sock_ops = {
-	.family		=	AF_INET6,
-	.obj_size	=	sizeof(struct tcp6_request_sock),
-	.rtx_syn_ack	=	tcp_v6_send_synack,
-	.send_ack	=	tcp_v6_reqsk_send_ack,
-	.destructor	=	tcp_v6_reqsk_destructor,
-	.send_reset	=	tcp_v6_send_reset
-};
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+						   const struct in6_addr *addr)
+{
+	return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
+}
 
-static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
+static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
+						struct sock *addr_sk)
 {
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct inet6_skb_parm *opt = IP6CB(skb);
-
-	if (np->rxopt.all) {
-		if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
-		    ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
-		    (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
-		    ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
-			return 1;
-	}
-	return 0;
+	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
 }
 
+static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
+						      struct request_sock *req)
+{
+	return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
+}
 
-static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
-			      struct sk_buff *skb)
+static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
+				 int optlen)
 {
-	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct tcp_md5sig cmd;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
-		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
-		skb->csum = offsetof(struct tcphdr, check);
-	} else {
-		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
-					    csum_partial((char *)th, th->doff<<2, 
-							 skb->csum));
-	}
-}
+	if (optlen < sizeof(cmd))
+		return -EINVAL;
 
+	if (copy_from_user(&cmd, optval, sizeof(cmd)))
+		return -EFAULT;
 
-static void tcp_v6_send_reset(struct sk_buff *skb)
-{
-	struct tcphdr *th = skb->h.th, *t1; 
-	struct sk_buff *buff;
-	struct flowi fl;
+	if (sin6->sin6_family != AF_INET6)
+		return -EINVAL;
 
-	if (th->rst)
-		return;
+	if (!cmd.tcpm_keylen) {
+		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+					      AF_INET);
+		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+				      AF_INET6);
+	}
 
-	if (!ipv6_unicast_destination(skb))
-		return; 
+	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+		return -EINVAL;
 
-	/*
-	 * We need to grab some memory, and put together an RST,
-	 * and then put it into the queue to be sent.
-	 */
+	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+				      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 
-	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
-			 GFP_ATOMIC);
-	if (buff == NULL) 
-	  	return;
+	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+			      AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+}
 
-	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
+static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
+					const struct in6_addr *daddr,
+					const struct in6_addr *saddr, int nbytes)
+{
+	struct tcp6_pseudohdr *bp;
+	struct scatterlist sg;
+
+	bp = &hp->md5_blk.ip6;
+	/* 1. TCP pseudo-header (RFC2460) */
+	bp->saddr = *saddr;
+	bp->daddr = *daddr;
+	bp->protocol = cpu_to_be32(IPPROTO_TCP);
+	bp->len = cpu_to_be32(nbytes);
+
+	sg_init_one(&sg, bp, sizeof(*bp));
+	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+}
 
-	t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
+static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+			       const struct in6_addr *daddr, struct in6_addr *saddr,
+			       const struct tcphdr *th)
+{
+	struct tcp_md5sig_pool *hp;
+	struct hash_desc *desc;
+
+	hp = tcp_get_md5sig_pool();
+	if (!hp)
+		goto clear_hash_noput;
+	desc = &hp->md5_desc;
+
+	if (crypto_hash_init(desc))
+		goto clear_hash;
+	if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
+		goto clear_hash;
+	if (tcp_md5_hash_header(hp, th))
+		goto clear_hash;
+	if (tcp_md5_hash_key(hp, key))
+		goto clear_hash;
+	if (crypto_hash_final(desc, md5_hash))
+		goto clear_hash;
+
+	tcp_put_md5sig_pool();
+	return 0;
 
-	/* Swap the send and the receive. */
-	memset(t1, 0, sizeof(*t1));
-	t1->dest = th->source;
-	t1->source = th->dest;
-	t1->doff = sizeof(*t1)/4;
-	t1->rst = 1;
-  
-	if(th->ack) {
-	  	t1->seq = th->ack_seq;
+clear_hash:
+	tcp_put_md5sig_pool();
+clear_hash_noput:
+	memset(md5_hash, 0, 16);
+	return 1;
+}
+
+static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+			       const struct sock *sk,
+			       const struct request_sock *req,
+			       const struct sk_buff *skb)
+{
+	const struct in6_addr *saddr, *daddr;
+	struct tcp_md5sig_pool *hp;
+	struct hash_desc *desc;
+	const struct tcphdr *th = tcp_hdr(skb);
+
+	if (sk) {
+		saddr = &inet6_sk(sk)->saddr;
+		daddr = &sk->sk_v6_daddr;
+	} else if (req) {
+		saddr = &inet_rsk(req)->ir_v6_loc_addr;
+		daddr = &inet_rsk(req)->ir_v6_rmt_addr;
 	} else {
-		t1->ack = 1;
-		t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
-				    + skb->len - (th->doff<<2));
+		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+		saddr = &ip6h->saddr;
+		daddr = &ip6h->daddr;
 	}
 
-	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
-
-	memset(&fl, 0, sizeof(fl));
-	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
-
-	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
-				    sizeof(*t1), IPPROTO_TCP,
-				    buff->csum);
+	hp = tcp_get_md5sig_pool();
+	if (!hp)
+		goto clear_hash_noput;
+	desc = &hp->md5_desc;
+
+	if (crypto_hash_init(desc))
+		goto clear_hash;
+
+	if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
+		goto clear_hash;
+	if (tcp_md5_hash_header(hp, th))
+		goto clear_hash;
+	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+		goto clear_hash;
+	if (tcp_md5_hash_key(hp, key))
+		goto clear_hash;
+	if (crypto_hash_final(desc, md5_hash))
+		goto clear_hash;
+
+	tcp_put_md5sig_pool();
+	return 0;
 
-	fl.proto = IPPROTO_TCP;
-	fl.oif = inet6_iif(skb);
-	fl.fl_ip_dport = t1->dest;
-	fl.fl_ip_sport = t1->source;
+clear_hash:
+	tcp_put_md5sig_pool();
+clear_hash_noput:
+	memset(md5_hash, 0, 16);
+	return 1;
+}
 
-	/* sk = NULL, but it is safe for now. RST socket required. */
-	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
+static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+{
+	const __u8 *hash_location = NULL;
+	struct tcp_md5sig_key *hash_expected;
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
+	int genhash;
+	u8 newhash[16];
+
+	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
+	hash_location = tcp_parse_md5sig_option(th);
+
+	/* We've parsed the options - do we have a hash? */
+	if (!hash_expected && !hash_location)
+		return 0;
 
-		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
-			return;
+	if (hash_expected && !hash_location) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+		return 1;
+	}
 
-		ip6_xmit(NULL, buff, &fl, NULL, 0);
-		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
-		TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
-		return;
+	if (!hash_expected && hash_location) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+		return 1;
 	}
 
-	kfree_skb(buff);
+	/* check the signature */
+	genhash = tcp_v6_md5_hash_skb(newhash,
+				      hash_expected,
+				      NULL, NULL, skb);
+
+	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
+				     genhash ? "failed" : "mismatch",
+				     &ip6h->saddr, ntohs(th->source),
+				     &ip6h->daddr, ntohs(th->dest));
+		return 1;
+	}
+	return 0;
 }
+#endif
+
+struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
+	.family		=	AF_INET6,
+	.obj_size	=	sizeof(struct tcp6_request_sock),
+	.rtx_syn_ack	=	tcp_v6_rtx_synack,
+	.send_ack	=	tcp_v6_reqsk_send_ack,
+	.destructor	=	tcp_v6_reqsk_destructor,
+	.send_reset	=	tcp_v6_send_reset,
+	.syn_ack_timeout =	tcp_syn_ack_timeout,
+};
 
-static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
+#ifdef CONFIG_TCP_MD5SIG
+static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+	.md5_lookup	=	tcp_v6_reqsk_md5_lookup,
+	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
+};
+#endif
+
+static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
+				 u32 tsval, u32 tsecr, int oif,
+				 struct tcp_md5sig_key *key, int rst, u8 tclass,
+				 u32 label)
 {
-	struct tcphdr *th = skb->h.th, *t1;
+	const struct tcphdr *th = tcp_hdr(skb);
+	struct tcphdr *t1;
 	struct sk_buff *buff;
-	struct flowi fl;
-	int tot_len = sizeof(struct tcphdr);
+	struct flowi6 fl6;
+	struct net *net = dev_net(skb_dst(skb)->dev);
+	struct sock *ctl_sk = net->ipv6.tcp_sk;
+	unsigned int tot_len = sizeof(struct tcphdr);
+	struct dst_entry *dst;
+	__be32 *topt;
 
-	if (ts)
-		tot_len += 3*4;
+	if (tsecr)
+		tot_len += TCPOLEN_TSTAMP_ALIGNED;
+#ifdef CONFIG_TCP_MD5SIG
+	if (key)
+		tot_len += TCPOLEN_MD5SIG_ALIGNED;
+#endif
 
 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 			 GFP_ATOMIC);
@@ -1028,205 +764,381 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 
 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
 
-	t1 = (struct tcphdr *) skb_push(buff,tot_len);
+	t1 = (struct tcphdr *) skb_push(buff, tot_len);
+	skb_reset_transport_header(buff);
 
 	/* Swap the send and the receive. */
 	memset(t1, 0, sizeof(*t1));
 	t1->dest = th->source;
 	t1->source = th->dest;
-	t1->doff = tot_len/4;
+	t1->doff = tot_len / 4;
 	t1->seq = htonl(seq);
 	t1->ack_seq = htonl(ack);
-	t1->ack = 1;
+	t1->ack = !rst || !th->ack;
+	t1->rst = rst;
 	t1->window = htons(win);
-	
-	if (ts) {
-		u32 *ptr = (u32*)(t1 + 1);
-		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
-			       (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
-		*ptr++ = htonl(tcp_time_stamp);
-		*ptr = htonl(ts);
+
+	topt = (__be32 *)(t1 + 1);
+
+	if (tsecr) {
+		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		*topt++ = htonl(tsval);
+		*topt++ = htonl(tsecr);
 	}
 
-	buff->csum = csum_partial((char *)t1, tot_len, 0);
+#ifdef CONFIG_TCP_MD5SIG
+	if (key) {
+		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
+		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
+				    &ipv6_hdr(skb)->saddr,
+				    &ipv6_hdr(skb)->daddr, t1);
+	}
+#endif
 
-	memset(&fl, 0, sizeof(fl));
-	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.daddr = ipv6_hdr(skb)->saddr;
+	fl6.saddr = ipv6_hdr(skb)->daddr;
+	fl6.flowlabel = label;
+
+	buff->ip_summed = CHECKSUM_PARTIAL;
+	buff->csum = 0;
+
+	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
+
+	fl6.flowi6_proto = IPPROTO_TCP;
+	if (rt6_need_strict(&fl6.daddr) && !oif)
+		fl6.flowi6_oif = inet6_iif(skb);
+	else
+		fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
+	fl6.fl6_dport = t1->dest;
+	fl6.fl6_sport = t1->source;
+	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+
+	/* Pass a socket to ip6_dst_lookup either it is for RST
+	 * Underlying function will use this to retrieve the network
+	 * namespace
+	 */
+	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
+	if (!IS_ERR(dst)) {
+		skb_dst_set(buff, dst);
+		ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
+		TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+		if (rst)
+			TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+		return;
+	}
 
-	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
-				    tot_len, IPPROTO_TCP,
-				    buff->csum);
+	kfree_skb(buff);
+}
 
-	fl.proto = IPPROTO_TCP;
-	fl.oif = inet6_iif(skb);
-	fl.fl_ip_dport = t1->dest;
-	fl.fl_ip_sport = t1->source;
+static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
+{
+	const struct tcphdr *th = tcp_hdr(skb);
+	u32 seq = 0, ack_seq = 0;
+	struct tcp_md5sig_key *key = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+	const __u8 *hash_location = NULL;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	unsigned char newhash[16];
+	int genhash;
+	struct sock *sk1 = NULL;
+#endif
+	int oif;
 
-	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
-		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
-			return;
-		ip6_xmit(NULL, buff, &fl, NULL, 0);
-		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+	if (th->rst)
+		return;
+
+	if (!ipv6_unicast_destination(skb))
 		return;
+
+#ifdef CONFIG_TCP_MD5SIG
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+					   &tcp_hashinfo, &ipv6h->saddr,
+					   th->source, &ipv6h->daddr,
+					   ntohs(th->source), inet6_iif(skb));
+		if (!sk1)
+			return;
+
+		rcu_read_lock();
+		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+		if (!key)
+			goto release_sk1;
+
+		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			goto release_sk1;
+	} else {
+		key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
 	}
+#endif
 
-	kfree_skb(buff);
+	if (th->ack)
+		seq = ntohl(th->ack_seq);
+	else
+		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
+			  (th->doff << 2);
+
+	oif = sk ? sk->sk_bound_dev_if : 0;
+	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+	if (sk1) {
+		rcu_read_unlock();
+		sock_put(sk1);
+	}
+#endif
+}
+
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+			    u32 win, u32 tsval, u32 tsecr, int oif,
+			    struct tcp_md5sig_key *key, u8 tclass,
+			    u32 label)
+{
+	tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass,
+			     label);
 }
 
 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_timewait_sock *tw = inet_twsk(sk);
-	const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
 	tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-			tcptw->tw_ts_recent);
+			tcp_time_stamp + tcptw->tw_ts_offset,
+			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
+			tw->tw_tclass, (tw->tw_flowlabel << 12));
 
 	inet_twsk_put(tw);
 }
 
-static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
+static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+				  struct request_sock *req)
 {
-	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
+	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+	 */
+	tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
+			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+			tcp_rsk(req)->rcv_nxt,
+			req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
+			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+			0, 0);
 }
 
 
-static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
+static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
 	struct request_sock *req, **prev;
-	const struct tcphdr *th = skb->h.th;
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct sock *nsk;
 
 	/* Find possible connection requests. */
-	req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
-				&skb->nh.ipv6h->daddr, inet6_iif(skb));
+	req = inet6_csk_search_req(sk, &prev, th->source,
+				   &ipv6_hdr(skb)->saddr,
+				   &ipv6_hdr(skb)->daddr, inet6_iif(skb));
 	if (req)
-		return tcp_check_req(sk, skb, req, prev);
+		return tcp_check_req(sk, skb, req, prev, false);
 
-	nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
-					 th->source, &skb->nh.ipv6h->daddr,
-					 ntohs(th->dest), inet6_iif(skb));
+	nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
+			&ipv6_hdr(skb)->saddr, th->source,
+			&ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
 
 	if (nsk) {
 		if (nsk->sk_state != TCP_TIME_WAIT) {
 			bh_lock_sock(nsk);
 			return nsk;
 		}
-		inet_twsk_put((struct inet_timewait_sock *)nsk);
+		inet_twsk_put(inet_twsk(nsk));
 		return NULL;
 	}
 
-#if 0 /*def CONFIG_SYN_COOKIES*/
-	if (!th->rst && !th->syn && th->ack)
-		sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
+#ifdef CONFIG_SYN_COOKIES
+	if (!th->syn)
+		sk = cookie_v6_check(sk, skb);
 #endif
 	return sk;
 }
 
-static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
-
-	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
-	inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
-}
-
-
 /* FIXME: this is substantially similar to the ipv4 code.
  * Can some kind of merge be done? -- erics
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcp6_request_sock *treq;
-	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_options_received tmp_opt;
+	struct request_sock *req;
+	struct inet_request_sock *ireq;
+	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct request_sock *req = NULL;
 	__u32 isn = TCP_SKB_CB(skb)->when;
+	struct dst_entry *dst = NULL;
+	struct tcp_fastopen_cookie foc = { .len = -1 };
+	bool want_cookie = false, fastopen;
+	struct flowi6 fl6;
+	int err;
 
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_conn_request(sk, skb);
 
 	if (!ipv6_unicast_destination(skb))
-		goto drop; 
+		goto drop;
 
-	/*
-	 *	There are no SYN attacks on IPv6, yet...	
-	 */
-	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
-		if (net_ratelimit())
-			printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
-		goto drop;		
+	if ((sysctl_tcp_syncookies == 2 ||
+	     inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+		want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
+		if (!want_cookie)
+			goto drop;
 	}
 
-	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
 		goto drop;
+	}
 
-	req = reqsk_alloc(&tcp6_request_sock_ops);
+	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (req == NULL)
 		goto drop;
 
+#ifdef CONFIG_TCP_MD5SIG
+	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
+#endif
+
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
+	tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
 
-	tcp_parse_options(skb, &tmp_opt, 0);
+	if (want_cookie && !tmp_opt.saw_tstamp)
+		tcp_clear_options(&tmp_opt);
 
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
-	treq = tcp6_rsk(req);
-	ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
-	TCP_ECN_create_request(req, skb->h.th);
-	treq->pktopts = NULL;
-	if (ipv6_opt_accepted(sk, skb) ||
-	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
-	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
-		atomic_inc(&skb->users);
-		treq->pktopts = skb;
-	}
-	treq->iif = sk->sk_bound_dev_if;
+	ireq = inet_rsk(req);
+	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+	if (!want_cookie || tmp_opt.tstamp_ok)
+		TCP_ECN_create_request(req, skb, sock_net(sk));
+
+	ireq->ir_iif = sk->sk_bound_dev_if;
+	ireq->ir_mark = inet_request_mark(sk, skb);
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		treq->iif = inet6_iif(skb);
+	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq->ir_iif = inet6_iif(skb);
+
+	if (!isn) {
+		if (ipv6_opt_accepted(sk, skb) ||
+		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim ||
+		    np->repflow) {
+			atomic_inc(&skb->users);
+			ireq->pktopts = skb;
+		}
 
-	if (isn == 0) 
-		isn = tcp_v6_init_sequence(sk,skb);
+		if (want_cookie) {
+			isn = cookie_v6_init_sequence(sk, skb, &req->mss);
+			req->cookie_ts = tmp_opt.tstamp_ok;
+			goto have_isn;
+		}
 
-	tcp_rsk(req)->snt_isn = isn;
+		/* VJ's idea. We save last timestamp seen
+		 * from the destination in peer table, when entering
+		 * state TIME-WAIT, and check against it before
+		 * accepting new connection request.
+		 *
+		 * If "isn" is not zero, this request hit alive
+		 * timewait bucket, so that all the necessary checks
+		 * are made in the function processing timewait state.
+		 */
+		if (tmp_opt.saw_tstamp &&
+		    tcp_death_row.sysctl_tw_recycle &&
+		    (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
+			if (!tcp_peer_is_proven(req, dst, true)) {
+				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+				goto drop_and_release;
+			}
+		}
+		/* Kill the following clause, if you dislike this way. */
+		else if (!sysctl_tcp_syncookies &&
+			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+			  (sysctl_max_syn_backlog >> 2)) &&
+			 !tcp_peer_is_proven(req, dst, false)) {
+			/* Without syncookies last quarter of
+			 * backlog is filled with destinations,
+			 * proven to be alive.
+			 * It means that we continue to communicate
+			 * to destinations, already remembered
+			 * to the moment of synflood.
+			 */
+			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
+				       &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
+			goto drop_and_release;
+		}
 
-	if (tcp_v6_send_synack(sk, req, NULL))
-		goto drop;
+		isn = tcp_v6_init_sequence(skb);
+	}
+have_isn:
 
-	tcp_v6_synq_add(sk, req);
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_release;
 
+	if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)
+		goto drop_and_free;
+
+	tcp_rsk(req)->snt_isn = isn;
+	tcp_rsk(req)->snt_synack = tcp_time_stamp;
+	tcp_openreq_init_rwin(req, sk, dst);
+	fastopen = !want_cookie &&
+		   tcp_try_fastopen(sk, skb, req, &foc, dst);
+	err = tcp_v6_send_synack(sk, dst, &fl6, req,
+				 skb_get_queue_mapping(skb), &foc);
+	if (!fastopen) {
+		if (err || want_cookie)
+			goto drop_and_free;
+
+		tcp_rsk(req)->listener = NULL;
+		inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+	}
 	return 0;
 
+drop_and_release:
+	dst_release(dst);
+drop_and_free:
+	reqsk_free(req);
 drop:
-	if (req)
-		reqsk_free(req);
-
-	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return 0; /* don't send reset */
 }
 
-static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
-					  struct request_sock *req,
-					  struct dst_entry *dst)
+static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+					 struct request_sock *req,
+					 struct dst_entry *dst)
 {
-	struct tcp6_request_sock *treq = tcp6_rsk(req);
+	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
 	struct tcp_sock *newtp;
 	struct sock *newsk;
-	struct ipv6_txoptions *opt;
+#ifdef CONFIG_TCP_MD5SIG
+	struct tcp_md5sig_key *key;
+#endif
+	struct flowi6 fl6;
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		/*
@@ -1235,7 +1147,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
 
-		if (newsk == NULL) 
+		if (newsk == NULL)
 			return NULL;
 
 		newtcp6sk = (struct tcp6_sock *)newsk;
@@ -1247,20 +1159,27 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
-			      newinet->daddr);
+		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
 
-		ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
-			      newinet->saddr);
+		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
-		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+		newsk->sk_v6_rcv_saddr = newnp->saddr;
 
-		newtp->af_specific = &ipv6_mapped;
+		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
+#ifdef CONFIG_TCP_MD5SIG
+		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
+#endif
+
+		newnp->ipv6_ac_list = NULL;
+		newnp->ipv6_fl_list = NULL;
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
-		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
+		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
+		newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+		if (np->repflow)
+			newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1269,57 +1188,28 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		 */
 
 		/* It is tricky place. Until this moment IPv4 tcp
-		   worked with IPv6 af_tcp.af_specific.
+		   worked with IPv6 icsk.icsk_af_ops.
 		   Sync it now.
 		 */
-		tcp_sync_mss(newsk, newtp->pmtu_cookie);
+		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
 
 		return newsk;
 	}
 
-	opt = np->opt;
+	ireq = inet_rsk(req);
 
 	if (sk_acceptq_is_full(sk))
 		goto out_overflow;
 
-	if (np->rxopt.bits.osrcrt == 2 &&
-	    opt == NULL && treq->pktopts) {
-		struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
-		if (rxopt->srcrt)
-			opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
-	}
-
-	if (dst == NULL) {
-		struct in6_addr *final_p = NULL, final;
-		struct flowi fl;
-
-		memset(&fl, 0, sizeof(fl));
-		fl.proto = IPPROTO_TCP;
-		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-		if (opt && opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
-		fl.oif = sk->sk_bound_dev_if;
-		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_sk(sk)->sport;
-
-		if (ip6_dst_lookup(sk, &dst, &fl))
+	if (!dst) {
+		dst = inet6_csk_route_req(sk, &fl6, req);
+		if (!dst)
 			goto out;
-
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
-			goto out;
-	} 
+	}
 
 	newsk = tcp_create_openreq_child(sk, req, skb);
 	if (newsk == NULL)
-		goto out;
+		goto out_nonewsk;
 
 	/*
 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -1327,9 +1217,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	 * comment in that function for the gory details. -acme
 	 */
 
-	ip6_dst_store(newsk, dst, NULL);
-	newsk->sk_route_caps = dst->dev->features &
-		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	newsk->sk_gso_type = SKB_GSO_TCPV6;
+	__ip6_dst_store(newsk, dst, NULL, NULL);
+	inet6_sk_rx_dst_set(newsk, skb);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1340,32 +1230,38 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
-	ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
-	ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
-	newsk->sk_bound_dev_if = treq->iif;
+	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
+	newnp->saddr = ireq->ir_v6_loc_addr;
+	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
+	newsk->sk_bound_dev_if = ireq->ir_iif;
 
-	/* Now IPv6 options... 
+	/* Now IPv6 options...
 
 	   First: no IPv4 options.
 	 */
-	newinet->opt = NULL;
+	newinet->inet_opt = NULL;
+	newnp->ipv6_ac_list = NULL;
+	newnp->ipv6_fl_list = NULL;
 
 	/* Clone RX bits */
 	newnp->rxopt.all = np->rxopt.all;
 
 	/* Clone pktoptions received with SYN */
 	newnp->pktoptions = NULL;
-	if (treq->pktopts != NULL) {
-		newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
-		kfree_skb(treq->pktopts);
-		treq->pktopts = NULL;
+	if (ireq->pktopts != NULL) {
+		newnp->pktoptions = skb_clone(ireq->pktopts,
+					      sk_gfp_atomic(sk, GFP_ATOMIC));
+		consume_skb(ireq->pktopts);
+		ireq->pktopts = NULL;
 		if (newnp->pktoptions)
 			skb_set_owner_r(newnp->pktoptions, newsk);
 	}
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
-	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+	if (np->repflow)
+		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
 
 	/* Clone native IPv6 options from listening socket (if any)
 
@@ -1373,59 +1269,58 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	   but we make one more one thing there: reattach optmem
 	   to newsk.
 	 */
-	if (opt) {
-		newnp->opt = ipv6_dup_options(newsk, opt);
-		if (opt != np->opt)
-			sock_kfree_s(sk, opt, opt->tot_len);
-	}
+	if (np->opt)
+		newnp->opt = ipv6_dup_options(newsk, np->opt);
 
-	newtp->ext_header_len = 0;
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newnp->opt)
-		newtp->ext_header_len = newnp->opt->opt_nflen +
-					newnp->opt->opt_flen;
+		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
+						     newnp->opt->opt_flen);
 
 	tcp_sync_mss(newsk, dst_mtu(dst));
-	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	newtp->advmss = dst_metric_advmss(dst);
+	if (tcp_sk(sk)->rx_opt.user_mss &&
+	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
+		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
+
 	tcp_initialize_rcv_mss(newsk);
 
-	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
+	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
+	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
+
+#ifdef CONFIG_TCP_MD5SIG
+	/* Copy over the MD5 key from the original socket */
+	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
+	if (key != NULL) {
+		/* We're using one, so create a matching key
+		 * on the newsk structure. If we fail to get
+		 * memory, then we end up not copying the key
+		 * across. Shucks.
+		 */
+		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
+			       AF_INET6, key->key, key->keylen,
+			       sk_gfp_atomic(sk, GFP_ATOMIC));
+	}
+#endif
 
-	__tcp_v6_hash(newsk);
-	inet_inherit_port(&tcp_hashinfo, sk, newsk);
+	if (__inet_inherit_port(sk, newsk) < 0) {
+		inet_csk_prepare_forced_close(newsk);
+		tcp_done(newsk);
+		goto out;
+	}
+	__inet6_hash(newsk, NULL);
 
 	return newsk;
 
 out_overflow:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
-out:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
-	if (opt && opt != np->opt)
-		sock_kfree_s(sk, opt, opt->tot_len);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+out_nonewsk:
 	dst_release(dst);
+out:
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return NULL;
 }
 
-static int tcp_v6_checksum_init(struct sk_buff *skb)
-{
-	if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-				  &skb->nh.ipv6h->daddr,skb->csum))
-			return 0;
-		LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
-	}
-	if (skb->len <= 76) {
-		if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-				 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
-			return -1;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else {
-		skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-					  &skb->nh.ipv6h->daddr,0);
-	}
-	return 0;
-}
-
 /* The socket must have it's spinlock held when we get
  * here.
  *
@@ -1451,7 +1346,12 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_do_rcv(sk, skb);
 
-	if (sk_filter(sk, skb, 0))
+#ifdef CONFIG_TCP_MD5SIG
+	if (tcp_v6_inbound_md5_hash(sk, skb))
+		goto discard;
+#endif
+
+	if (sk_filter(sk, skb))
 		goto discard;
 
 	/*
@@ -1470,25 +1370,33 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	   looks not very well thought. For now we latch
 	   options, received in the last packet, enqueued
 	   by tcp. Feel free to propose better solution.
-	                                       --ANK (980728)
+					       --ANK (980728)
 	 */
 	if (np->rxopt.all)
-		opt_skb = skb_clone(skb, GFP_ATOMIC);
+		opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
-		TCP_CHECK_TIMER(sk);
-		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
-			goto reset;
-		TCP_CHECK_TIMER(sk);
+		struct dst_entry *dst = sk->sk_rx_dst;
+
+		sock_rps_save_rxhash(sk, skb);
+		if (dst) {
+			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+				dst_release(dst);
+				sk->sk_rx_dst = NULL;
+			}
+		}
+
+		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
 		if (opt_skb)
 			goto ipv6_pktoptions;
 		return 0;
 	}
 
-	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
+	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
 		goto csum_err;
 
-	if (sk->sk_state == TCP_LISTEN) { 
+	if (sk->sk_state == TCP_LISTEN) {
 		struct sock *nsk = tcp_v6_hnd_req(sk, skb);
 		if (!nsk)
 			goto discard;
@@ -1498,32 +1406,33 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		 * otherwise we just shortcircuit this and continue with
 		 * the new socket..
 		 */
- 		if(nsk != sk) {
+		if (nsk != sk) {
+			sock_rps_save_rxhash(nsk, skb);
 			if (tcp_child_process(sk, nsk, skb))
 				goto reset;
 			if (opt_skb)
 				__kfree_skb(opt_skb);
 			return 0;
 		}
-	}
+	} else
+		sock_rps_save_rxhash(sk, skb);
 
-	TCP_CHECK_TIMER(sk);
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
 		goto reset;
-	TCP_CHECK_TIMER(sk);
 	if (opt_skb)
 		goto ipv6_pktoptions;
 	return 0;
 
 reset:
-	tcp_v6_send_reset(skb);
+	tcp_v6_send_reset(sk, skb);
 discard:
 	if (opt_skb)
 		__kfree_skb(opt_skb);
 	kfree_skb(skb);
 	return 0;
 csum_err:
-	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
 
 
@@ -1541,7 +1450,11 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
 			np->mcast_oif = inet6_iif(opt_skb);
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
-			np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
+			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
+			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
+		if (np->repflow)
+			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
 		if (ipv6_opt_accepted(sk, opt_skb)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1551,17 +1464,17 @@ ipv6_pktoptions:
 		}
 	}
 
-	if (opt_skb)
-		kfree_skb(opt_skb);
+	kfree_skb(opt_skb);
 	return 0;
 }
 
-static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int tcp_v6_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
-	struct tcphdr *th;	
+	const struct tcphdr *th;
+	const struct ipv6hdr *hdr;
 	struct sock *sk;
 	int ret;
+	struct net *net = dev_net(skb->dev);
 
 	if (skb->pkt_type != PACKET_HOST)
 		goto discard_it;
@@ -1569,35 +1482,32 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
 	/*
 	 *	Count it even if it's bad.
 	 */
-	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
+	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
 
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		goto discard_it;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	if (th->doff < sizeof(struct tcphdr)/4)
 		goto bad_packet;
 	if (!pskb_may_pull(skb, th->doff*4))
 		goto discard_it;
 
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v6_checksum_init(skb) < 0))
-		goto bad_packet;
+	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
+		goto csum_error;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
+	hdr = ipv6_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
 	TCP_SKB_CB(skb)->when = 0;
-	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
+	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
 	TCP_SKB_CB(skb)->sacked = 0;
 
-	sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
-			    &skb->nh.ipv6h->daddr, ntohs(th->dest),
-			    inet6_iif(skb));
-
+	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1605,21 +1515,41 @@ process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
 
+	if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+		goto discard_and_relse;
+	}
+
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
+	sk_mark_napi_id(sk, skb);
 	skb->dev = NULL;
 
-	bh_lock_sock(sk);
+	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
+#ifdef CONFIG_NET_DMA
+		struct tcp_sock *tp = tcp_sk(sk);
+		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
+			tp->ucopy.dma_chan = net_dma_find_channel();
+		if (tp->ucopy.dma_chan)
 			ret = tcp_v6_do_rcv(sk, skb);
-	} else
-		sk_add_backlog(sk, skb);
+		else
+#endif
+		{
+			if (!tcp_prequeue(sk, skb))
+				ret = tcp_v6_do_rcv(sk, skb);
+		}
+	} else if (unlikely(sk_add_backlog(sk, skb,
+					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
+		bh_unlock_sock(sk);
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
+		goto discard_and_relse;
+	}
 	bh_unlock_sock(sk);
 
 	sock_put(sk);
@@ -1630,18 +1560,15 @@ no_tcp_socket:
 		goto discard_it;
 
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+csum_error:
+		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
 bad_packet:
-		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
 	} else {
-		tcp_v6_send_reset(skb);
+		tcp_v6_send_reset(NULL, skb);
 	}
 
 discard_it:
-
-	/*
-	 *	Discard frame
-	 */
-
 	kfree_skb(skb);
 	return 0;
 
@@ -1651,24 +1578,27 @@ discard_and_relse:
 
 do_time_wait:
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-		inet_twsk_put((struct inet_timewait_sock *)sk);
+		inet_twsk_put(inet_twsk(sk));
 		goto discard_it;
 	}
 
-	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
-		TCP_INC_STATS_BH(TCP_MIB_INERRS);
-		inet_twsk_put((struct inet_timewait_sock *)sk);
-		goto discard_it;
+	if (skb->len < (th->doff<<2)) {
+		inet_twsk_put(inet_twsk(sk));
+		goto bad_packet;
+	}
+	if (tcp_checksum_complete(skb)) {
+		inet_twsk_put(inet_twsk(sk));
+		goto csum_error;
 	}
 
-	switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
-					   skb, th)) {
+	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
 	case TCP_TW_SYN:
 	{
 		struct sock *sk2;
 
-		sk2 = inet6_lookup_listener(&tcp_hashinfo,
-					    &skb->nh.ipv6h->daddr,
+		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+					    &ipv6_hdr(skb)->saddr, th->source,
+					    &ipv6_hdr(skb)->daddr,
 					    ntohs(th->dest), inet6_iif(skb));
 		if (sk2 != NULL) {
 			struct inet_timewait_sock *tw = inet_twsk(sk);
@@ -1684,175 +1614,112 @@ do_time_wait:
 		break;
 	case TCP_TW_RST:
 		goto no_tcp_socket;
-	case TCP_TW_SUCCESS:;
+	case TCP_TW_SUCCESS:
+		;
 	}
 	goto discard_it;
 }
 
-static int tcp_v6_rebuild_header(struct sock *sk)
-{
-	int err;
-	struct dst_entry *dst;
-	struct ipv6_pinfo *np = inet6_sk(sk);
-
-	dst = __sk_dst_check(sk, np->dst_cookie);
-
-	if (dst == NULL) {
-		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *final_p = NULL, final;
-		struct flowi fl;
-
-		memset(&fl, 0, sizeof(fl));
-		fl.proto = IPPROTO_TCP;
-		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-		fl.fl6_flowlabel = np->flow_label;
-		fl.oif = sk->sk_bound_dev_if;
-		fl.fl_ip_dport = inet->dport;
-		fl.fl_ip_sport = inet->sport;
-
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-
-		err = ip6_dst_lookup(sk, &dst, &fl);
-		if (err) {
-			sk->sk_route_caps = 0;
-			return err;
-		}
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
-			sk->sk_err_soft = -err;
-			return err;
-		}
-
-		ip6_dst_store(sk, dst, NULL);
-		sk->sk_route_caps = dst->dev->features &
-			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-	}
-
-	return 0;
-}
-
-static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
+static void tcp_v6_early_demux(struct sk_buff *skb)
 {
-	struct sock *sk = skb->sk;
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct flowi fl;
-	struct dst_entry *dst;
-	struct in6_addr *final_p = NULL, final;
-
-	memset(&fl, 0, sizeof(fl));
-	fl.proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-	fl.fl6_flowlabel = np->flow_label;
-	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
-	fl.oif = sk->sk_bound_dev_if;
-	fl.fl_ip_sport = inet->sport;
-	fl.fl_ip_dport = inet->dport;
-
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	const struct ipv6hdr *hdr;
+	const struct tcphdr *th;
+	struct sock *sk;
 
-	dst = __sk_dst_check(sk, np->dst_cookie);
+	if (skb->pkt_type != PACKET_HOST)
+		return;
 
-	if (dst == NULL) {
-		int err = ip6_dst_lookup(sk, &dst, &fl);
+	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
+		return;
 
-		if (err) {
-			sk->sk_err_soft = -err;
-			return err;
-		}
+	hdr = ipv6_hdr(skb);
+	th = tcp_hdr(skb);
 
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
+	if (th->doff < sizeof(struct tcphdr) / 4)
+		return;
 
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
-			sk->sk_route_caps = 0;
-			return err;
+	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
+					&hdr->saddr, th->source,
+					&hdr->daddr, ntohs(th->dest),
+					inet6_iif(skb));
+	if (sk) {
+		skb->sk = sk;
+		skb->destructor = sock_edemux;
+		if (sk->sk_state != TCP_TIME_WAIT) {
+			struct dst_entry *dst = sk->sk_rx_dst;
+
+			if (dst)
+				dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+			if (dst &&
+			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
+				skb_dst_set_noref(skb, dst);
 		}
-
-		ip6_dst_store(sk, dst, NULL);
-		sk->sk_route_caps = dst->dev->features &
-			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 	}
-
-	skb->dst = dst_clone(dst);
-
-	/* Restore final destination back after routing done */
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-
-	return ip6_xmit(sk, skb, &fl, np->opt, 0);
 }
 
-static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
-
-	sin6->sin6_family = AF_INET6;
-	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
-	sin6->sin6_port	= inet_sk(sk)->dport;
-	/* We do not store received flowlabel for TCP */
-	sin6->sin6_flowinfo = 0;
-	sin6->sin6_scope_id = 0;
-	if (sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		sin6->sin6_scope_id = sk->sk_bound_dev_if;
-}
+static struct timewait_sock_ops tcp6_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+	.twsk_destructor = tcp_twsk_destructor,
+};
 
-static int tcp_v6_remember_stamp(struct sock *sk)
-{
-	/* Alas, not yet... */
-	return 0;
-}
+static const struct inet_connection_sock_af_ops ipv6_specific = {
+	.queue_xmit	   = inet6_csk_xmit,
+	.send_check	   = tcp_v6_send_check,
+	.rebuild_header	   = inet6_sk_rebuild_header,
+	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
+	.conn_request	   = tcp_v6_conn_request,
+	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
+	.net_header_len	   = sizeof(struct ipv6hdr),
+	.net_frag_header_len = sizeof(struct frag_hdr),
+	.setsockopt	   = ipv6_setsockopt,
+	.getsockopt	   = ipv6_getsockopt,
+	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
+	.sockaddr_len	   = sizeof(struct sockaddr_in6),
+	.bind_conflict	   = inet6_csk_bind_conflict,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_ipv6_setsockopt,
+	.compat_getsockopt = compat_ipv6_getsockopt,
+#endif
+};
 
-static struct tcp_func ipv6_specific = {
-	.queue_xmit	=	tcp_v6_xmit,
-	.send_check	=	tcp_v6_send_check,
-	.rebuild_header	=	tcp_v6_rebuild_header,
-	.conn_request	=	tcp_v6_conn_request,
-	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
-	.remember_stamp	=	tcp_v6_remember_stamp,
-	.net_header_len	=	sizeof(struct ipv6hdr),
-
-	.setsockopt	=	ipv6_setsockopt,
-	.getsockopt	=	ipv6_getsockopt,
-	.addr2sockaddr	=	v6_addr2sockaddr,
-	.sockaddr_len	=	sizeof(struct sockaddr_in6)
+#ifdef CONFIG_TCP_MD5SIG
+static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+	.md5_lookup	=	tcp_v6_md5_lookup,
+	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
+	.md5_parse	=	tcp_v6_parse_md5_keys,
 };
+#endif
 
 /*
  *	TCP over IPv4 via INET6 API
  */
-
-static struct tcp_func ipv6_mapped = {
-	.queue_xmit	=	ip_queue_xmit,
-	.send_check	=	tcp_v4_send_check,
-	.rebuild_header	=	inet_sk_rebuild_header,
-	.conn_request	=	tcp_v6_conn_request,
-	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
-	.remember_stamp	=	tcp_v4_remember_stamp,
-	.net_header_len	=	sizeof(struct iphdr),
-
-	.setsockopt	=	ipv6_setsockopt,
-	.getsockopt	=	ipv6_getsockopt,
-	.addr2sockaddr	=	v6_addr2sockaddr,
-	.sockaddr_len	=	sizeof(struct sockaddr_in6)
+static const struct inet_connection_sock_af_ops ipv6_mapped = {
+	.queue_xmit	   = ip_queue_xmit,
+	.send_check	   = tcp_v4_send_check,
+	.rebuild_header	   = inet_sk_rebuild_header,
+	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
+	.conn_request	   = tcp_v6_conn_request,
+	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
+	.net_header_len	   = sizeof(struct iphdr),
+	.setsockopt	   = ipv6_setsockopt,
+	.getsockopt	   = ipv6_getsockopt,
+	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
+	.sockaddr_len	   = sizeof(struct sockaddr_in6),
+	.bind_conflict	   = inet6_csk_bind_conflict,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_ipv6_setsockopt,
+	.compat_getsockopt = compat_ipv6_getsockopt,
+#endif
 };
 
-
+#ifdef CONFIG_TCP_MD5SIG
+static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
+	.md5_lookup	=	tcp_v4_md5_lookup,
+	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
+	.md5_parse	=	tcp_v6_parse_md5_keys,
+};
+#endif
 
 /* NOTE: A lot of things set to zero explicitly by call to
  *       sk_alloc() so need not be done here.
@@ -1860,100 +1727,72 @@ static struct tcp_func ipv6_mapped = {
 static int tcp_v6_init_sock(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	skb_queue_head_init(&tp->out_of_order_queue);
-	tcp_init_xmit_timers(sk);
-	tcp_prequeue_init(tp);
-
-	icsk->icsk_rto = TCP_TIMEOUT_INIT;
-	tp->mdev = TCP_TIMEOUT_INIT;
-
-	/* So many TCP implementations out there (incorrectly) count the
-	 * initial SYN frame in their delayed-ACK and congestion control
-	 * algorithms that we must have the following bandaid to talk
-	 * efficiently to them.  -DaveM
-	 */
-	tp->snd_cwnd = 2;
-
-	/* See draft-stevens-tcpca-spec-01 for discussion of the
-	 * initialization of these values.
-	 */
-	tp->snd_ssthresh = 0x7fffffff;
-	tp->snd_cwnd_clamp = ~0;
-	tp->mss_cache = 536;
-
-	tp->reordering = sysctl_tcp_reordering;
 
-	sk->sk_state = TCP_CLOSE;
+	tcp_init_sock(sk);
 
-	tp->af_specific = &ipv6_specific;
-	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
-	sk->sk_write_space = sk_stream_write_space;
-	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+	icsk->icsk_af_ops = &ipv6_specific;
 
-	sk->sk_sndbuf = sysctl_tcp_wmem[1];
-	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
-
-	atomic_inc(&tcp_sockets_allocated);
+#ifdef CONFIG_TCP_MD5SIG
+	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
+#endif
 
 	return 0;
 }
 
-static int tcp_v6_destroy_sock(struct sock *sk)
+static void tcp_v6_destroy_sock(struct sock *sk)
 {
 	tcp_v4_destroy_sock(sk);
-	return inet6_destroy_sock(sk);
+	inet6_destroy_sock(sk);
 }
 
+#ifdef CONFIG_PROC_FS
 /* Proc filesystem TCPv6 sock list dumping. */
-static void get_openreq6(struct seq_file *seq, 
-			 struct sock *sk, struct request_sock *req, int i, int uid)
+static void get_openreq6(struct seq_file *seq,
+			 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
 {
-	struct in6_addr *dest, *src;
 	int ttd = req->expires - jiffies;
+	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
+	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
 
 	if (ttd < 0)
 		ttd = 0;
 
-	src = &tcp6_rsk(req)->loc_addr;
-	dest = &tcp6_rsk(req)->rmt_addr;
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
+		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3],
-		   ntohs(inet_sk(sk)->sport),
+		   inet_rsk(req)->ir_num,
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3],
-		   ntohs(inet_rsk(req)->rmt_port),
+		   ntohs(inet_rsk(req)->ir_rmt_port),
 		   TCP_SYN_RECV,
-		   0,0, /* could print option size, but that is af dependent. */
-		   1,   /* timers active (only the expire timer) */  
-		   jiffies_to_clock_t(ttd), 
-		   req->retrans,
-		   uid,
-		   0,  /* non standard timer */  
+		   0, 0, /* could print option size, but that is af dependent. */
+		   1,   /* timers active (only the expire timer) */
+		   jiffies_to_clock_t(ttd),
+		   req->num_timeout,
+		   from_kuid_munged(seq_user_ns(seq), uid),
+		   0,  /* non standard timer */
 		   0, /* open_requests have no inode */
 		   0, req);
 }
 
 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 {
-	struct in6_addr *dest, *src;
+	const struct in6_addr *dest, *src;
 	__u16 destp, srcp;
 	int timer_active;
 	unsigned long timer_expires;
-	struct inet_sock *inet = inet_sk(sp);
-	struct tcp_sock *tp = tcp_sk(sp);
+	const struct inet_sock *inet = inet_sk(sp);
+	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
-	struct ipv6_pinfo *np = inet6_sk(sp);
+	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
 
-	dest  = &np->daddr;
-	src   = &np->rcv_saddr;
-	destp = ntohs(inet->dport);
-	srcp  = ntohs(inet->sport);
+	dest  = &sp->sk_v6_daddr;
+	src   = &sp->sk_v6_rcv_saddr;
+	destp = ntohs(inet->inet_dport);
+	srcp  = ntohs(inet->inet_sport);
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
 		timer_active	= 1;
@@ -1971,61 +1810,61 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
+		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
-		   sp->sk_state, 
-		   tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+		   sp->sk_state,
+		   tp->write_seq-tp->snd_una,
+		   (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
 		   timer_active,
-		   jiffies_to_clock_t(timer_expires - jiffies),
+		   jiffies_delta_to_clock_t(timer_expires - jiffies),
 		   icsk->icsk_retransmits,
-		   sock_i_uid(sp),
+		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
 		   icsk->icsk_probes_out,
 		   sock_i_ino(sp),
 		   atomic_read(&sp->sk_refcnt), sp,
-		   icsk->icsk_rto,
-		   icsk->icsk_ack.ato,
-		   (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
-		   tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
+		   jiffies_to_clock_t(icsk->icsk_rto),
+		   jiffies_to_clock_t(icsk->icsk_ack.ato),
+		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
+		   tp->snd_cwnd,
+		   sp->sk_state == TCP_LISTEN ?
+			(fastopenq ? fastopenq->max_qlen : 0) :
+			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
 		   );
 }
 
-static void get_timewait6_sock(struct seq_file *seq, 
+static void get_timewait6_sock(struct seq_file *seq,
 			       struct inet_timewait_sock *tw, int i)
 {
-	struct in6_addr *dest, *src;
+	const struct in6_addr *dest, *src;
 	__u16 destp, srcp;
-	struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
-	int ttd = tw->tw_ttd - jiffies;
+	s32 delta = tw->tw_ttd - inet_tw_time_stamp();
 
-	if (ttd < 0)
-		ttd = 0;
-
-	dest = &tcp6tw->tw_v6_daddr;
-	src  = &tcp6tw->tw_v6_rcv_saddr;
+	dest = &tw->tw_v6_daddr;
+	src  = &tw->tw_v6_rcv_saddr;
 	destp = ntohs(tw->tw_dport);
 	srcp  = ntohs(tw->tw_sport);
 
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
+		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
 		   tw->tw_substate, 0, 0,
-		   3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
 		   atomic_read(&tw->tw_refcnt), tw);
 }
 
-#ifdef CONFIG_PROC_FS
 static int tcp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct tcp_iter_state *st;
+	struct sock *sk = v;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -2041,39 +1880,58 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
 	switch (st->state) {
 	case TCP_SEQ_STATE_LISTENING:
 	case TCP_SEQ_STATE_ESTABLISHED:
-		get_tcp6_sock(seq, v, st->num);
+		if (sk->sk_state == TCP_TIME_WAIT)
+			get_timewait6_sock(seq, v, st->num);
+		else
+			get_tcp6_sock(seq, v, st->num);
 		break;
 	case TCP_SEQ_STATE_OPENREQ:
 		get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
 		break;
-	case TCP_SEQ_STATE_TIME_WAIT:
-		get_timewait6_sock(seq, v, st->num);
-		break;
 	}
 out:
 	return 0;
 }
 
-static struct file_operations tcp6_seq_fops;
+static const struct file_operations tcp6_afinfo_seq_fops = {
+	.owner   = THIS_MODULE,
+	.open    = tcp_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net
+};
+
 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
-	.owner		= THIS_MODULE,
 	.name		= "tcp6",
 	.family		= AF_INET6,
-	.seq_show	= tcp6_seq_show,
-	.seq_fops	= &tcp6_seq_fops,
+	.seq_fops	= &tcp6_afinfo_seq_fops,
+	.seq_ops	= {
+		.show		= tcp6_seq_show,
+	},
 };
 
-int __init tcp6_proc_init(void)
+int __net_init tcp6_proc_init(struct net *net)
 {
-	return tcp_proc_register(&tcp6_seq_afinfo);
+	return tcp_proc_register(net, &tcp6_seq_afinfo);
 }
 
-void tcp6_proc_exit(void)
+void tcp6_proc_exit(struct net *net)
 {
-	tcp_proc_unregister(&tcp6_seq_afinfo);
+	tcp_proc_unregister(net, &tcp6_seq_afinfo);
 }
 #endif
 
+static void tcp_v6_clear_sk(struct sock *sk, int size)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* we do not want to clear pinet6 field, because of RCU lookups */
+	sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
+
+	size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
+	memset(&inet->pinet6 + 1, 0, size);
+}
+
 struct proto tcpv6_prot = {
 	.name			= "TCPv6",
 	.owner			= THIS_MODULE,
@@ -2087,13 +1945,17 @@ struct proto tcpv6_prot = {
 	.shutdown		= tcp_shutdown,
 	.setsockopt		= tcp_setsockopt,
 	.getsockopt		= tcp_getsockopt,
-	.sendmsg		= tcp_sendmsg,
 	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
+	.release_cb		= tcp_release_cb,
+	.mtu_reduced		= tcp_v6_mtu_reduced,
 	.hash			= tcp_v6_hash,
-	.unhash			= tcp_unhash,
-	.get_port		= tcp_v6_get_port,
+	.unhash			= inet_unhash,
+	.get_port		= inet_csk_get_port,
 	.enter_memory_pressure	= tcp_enter_memory_pressure,
+	.stream_memory_free	= tcp_stream_memory_free,
 	.sockets_allocated	= &tcp_sockets_allocated,
 	.memory_allocated	= &tcp_memory_allocated,
 	.memory_pressure	= &tcp_memory_pressure,
@@ -2103,11 +1965,23 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
-	.twsk_obj_size		= sizeof(struct tcp6_timewait_sock),
+	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
+	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt	= compat_tcp_setsockopt,
+	.compat_getsockopt	= compat_tcp_getsockopt,
+#endif
+#ifdef CONFIG_MEMCG_KMEM
+	.proto_cgroup		= tcp_proto_cgroup,
+#endif
+	.clear_sk		= tcp_v6_clear_sk,
 };
 
-static struct inet6_protocol tcpv6_protocol = {
+static const struct inet6_protocol tcpv6_protocol = {
+	.early_demux	=	tcp_v6_early_demux,
 	.handler	=	tcp_v6_rcv,
 	.err_handler	=	tcp_v6_err,
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
@@ -2118,15 +1992,61 @@ static struct inet_protosw tcpv6_protosw = {
 	.protocol	=	IPPROTO_TCP,
 	.prot		=	&tcpv6_prot,
 	.ops		=	&inet6_stream_ops,
-	.capability	=	-1,
-	.no_check	=	0,
-	.flags		=	INET_PROTOSW_PERMANENT,
+	.flags		=	INET_PROTOSW_PERMANENT |
+				INET_PROTOSW_ICSK,
 };
 
-void __init tcpv6_init(void)
+static int __net_init tcpv6_net_init(struct net *net)
+{
+	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+				    SOCK_RAW, IPPROTO_TCP, net);
+}
+
+static void __net_exit tcpv6_net_exit(struct net *net)
+{
+	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+}
+
+static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
 {
+	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
+}
+
+static struct pernet_operations tcpv6_net_ops = {
+	.init	    = tcpv6_net_init,
+	.exit	    = tcpv6_net_exit,
+	.exit_batch = tcpv6_net_exit_batch,
+};
+
+int __init tcpv6_init(void)
+{
+	int ret;
+
+	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
+	if (ret)
+		goto out;
+
 	/* register inet6 protocol */
-	if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
-		printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
-	inet6_register_protosw(&tcpv6_protosw);
+	ret = inet6_register_protosw(&tcpv6_protosw);
+	if (ret)
+		goto out_tcpv6_protocol;
+
+	ret = register_pernet_subsys(&tcpv6_net_ops);
+	if (ret)
+		goto out_tcpv6_protosw;
+out:
+	return ret;
+
+out_tcpv6_protosw:
+	inet6_unregister_protosw(&tcpv6_protosw);
+out_tcpv6_protocol:
+	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+	goto out;
+}
+
+void tcpv6_exit(void)
+{
+	unregister_pernet_subsys(&tcpv6_net_ops);
+	inet6_unregister_protosw(&tcpv6_protosw);
+	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
 }
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
new file mode 100644
index 00000000000..01b0ff9a0c2
--- /dev/null
+++ b/net/ipv6/tcpv6_offload.c
@@ -0,0 +1,93 @@
+/*
+ *	IPV6 GSO/GRO offload support
+ *	Linux INET6 implementation
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *      TCPv6 GSO/GRO support
+ */
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/ip6_checksum.h>
+#include "ip6_offload.h"
+
+static int tcp_v6_gso_send_check(struct sk_buff *skb)
+{
+	const struct ipv6hdr *ipv6h;
+	struct tcphdr *th;
+
+	if (!pskb_may_pull(skb, sizeof(*th)))
+		return -EINVAL;
+
+	ipv6h = ipv6_hdr(skb);
+	th = tcp_hdr(skb);
+
+	th->check = 0;
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	__tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
+	return 0;
+}
+
+static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	const struct ipv6hdr *iph = skb_gro_network_header(skb);
+	__wsum wsum;
+
+	/* Don't bother verifying checksum if we're going to flush anyway. */
+	if (NAPI_GRO_CB(skb)->flush)
+		goto skip_csum;
+
+	wsum = NAPI_GRO_CB(skb)->csum;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_NONE:
+		wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb),
+				    wsum);
+
+		/* fall through */
+
+	case CHECKSUM_COMPLETE:
+		if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
+				  wsum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+
+skip_csum:
+	return tcp_gro_receive(head, skb);
+}
+
+static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct tcphdr *th = tcp_hdr(skb);
+
+	th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
+				  &iph->daddr, 0);
+	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+
+	return tcp_gro_complete(skb);
+}
+
+static const struct net_offload tcpv6_offload = {
+	.callbacks = {
+		.gso_send_check	=	tcp_v6_gso_send_check,
+		.gso_segment	=	tcp_gso_segment,
+		.gro_receive	=	tcp6_gro_receive,
+		.gro_complete	=	tcp6_gro_complete,
+	},
+};
+
+int __init tcpv6_offload_init(void)
+{
+	return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
+}
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
new file mode 100644
index 00000000000..2c4e4c5c761
--- /dev/null
+++ b/net/ipv6/tunnel6.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authors	Mitsuru KANDA  <mk@linux-ipv6.org>
+ * 		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ */
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
+static DEFINE_MUTEX(tunnel6_mutex);
+
+int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
+{
+	struct xfrm6_tunnel __rcu **pprev;
+	struct xfrm6_tunnel *t;
+	int ret = -EEXIST;
+	int priority = handler->priority;
+
+	mutex_lock(&tunnel6_mutex);
+
+	for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&tunnel6_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t->priority > priority)
+			break;
+		if (t->priority == priority)
+			goto err;
+	}
+
+	handler->next = *pprev;
+	rcu_assign_pointer(*pprev, handler);
+
+	ret = 0;
+
+err:
+	mutex_unlock(&tunnel6_mutex);
+
+	return ret;
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_register);
+
+int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
+{
+	struct xfrm6_tunnel __rcu **pprev;
+	struct xfrm6_tunnel *t;
+	int ret = -ENOENT;
+
+	mutex_lock(&tunnel6_mutex);
+
+	for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&tunnel6_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t == handler) {
+			*pprev = handler->next;
+			ret = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&tunnel6_mutex);
+
+	synchronize_net();
+
+	return ret;
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_deregister);
+
+#define for_each_tunnel_rcu(head, handler)		\
+	for (handler = rcu_dereference(head);		\
+	     handler != NULL;				\
+	     handler = rcu_dereference(handler->next))	\
+
+static int tunnel6_rcv(struct sk_buff *skb)
+{
+	struct xfrm6_tunnel *handler;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto drop;
+
+	for_each_tunnel_rcu(tunnel6_handlers, handler)
+		if (!handler->handler(skb))
+			return 0;
+
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int tunnel46_rcv(struct sk_buff *skb)
+{
+	struct xfrm6_tunnel *handler;
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto drop;
+
+	for_each_tunnel_rcu(tunnel46_handlers, handler)
+		if (!handler->handler(skb))
+			return 0;
+
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			u8 type, u8 code, int offset, __be32 info)
+{
+	struct xfrm6_tunnel *handler;
+
+	for_each_tunnel_rcu(tunnel6_handlers, handler)
+		if (!handler->err_handler(skb, opt, type, code, offset, info))
+			break;
+}
+
+static const struct inet6_protocol tunnel6_protocol = {
+	.handler	= tunnel6_rcv,
+	.err_handler	= tunnel6_err,
+	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static const struct inet6_protocol tunnel46_protocol = {
+	.handler	= tunnel46_rcv,
+	.err_handler	= tunnel6_err,
+	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static int __init tunnel6_init(void)
+{
+	if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) {
+		pr_err("%s: can't add protocol\n", __func__);
+		return -EAGAIN;
+	}
+	if (inet6_add_protocol(&tunnel46_protocol, IPPROTO_IPIP)) {
+		pr_err("%s: can't add protocol\n", __func__);
+		inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit tunnel6_fini(void)
+{
+	if (inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP))
+		pr_err("%s: can't remove protocol\n", __func__);
+	if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6))
+		pr_err("%s: can't remove protocol\n", __func__);
+}
+
+module_init(tunnel6_init);
+module_exit(tunnel6_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index bf9519341fd..7092ff78fd8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1,14 +1,12 @@
 /*
  *	UDP over IPv6
- *	Linux INET6 implementation 
+ *	Linux INET6 implementation
  *
  *	Authors:
- *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Pedro Roque		<roque@di.fc.ul.pt>
  *
  *	Based on linux/ipv4/udp.c
  *
- *	$Id: udp.c,v 1.65 2002/02/01 22:01:04 davem Exp $
- *
  *	Fixes:
  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
@@ -23,12 +21,10 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
-#include <linux/sched.h>
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
@@ -36,309 +32,524 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/init.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 
-#include <net/sock.h>
-#include <net/snmp.h>
-
-#include <net/ipv6.h>
 #include <net/ndisc.h>
 #include <net/protocol.h>
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
-#include <net/addrconf.h>
-#include <net/ip.h>
-#include <net/udp.h>
 #include <net/raw.h>
-#include <net/inet_common.h>
 #include <net/tcp_states.h>
-
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
+#include <net/inet6_hashtables.h>
+#include <net/busy_poll.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <trace/events/skb.h>
+#include "udp_impl.h"
+
+static unsigned int udp6_ehashfn(struct net *net,
+				  const struct in6_addr *laddr,
+				  const u16 lport,
+				  const struct in6_addr *faddr,
+				  const __be16 fport)
+{
+	static u32 udp6_ehash_secret __read_mostly;
+	static u32 udp_ipv6_hash_secret __read_mostly;
 
-DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
+	u32 lhash, fhash;
 
-/* Grrr, addr_type already calculated by caller, but I don't want
- * to add some silly "cookie" argument to this method just for that.
- */
-static int udp_v6_get_port(struct sock *sk, unsigned short snum)
+	net_get_random_once(&udp6_ehash_secret,
+			    sizeof(udp6_ehash_secret));
+	net_get_random_once(&udp_ipv6_hash_secret,
+			    sizeof(udp_ipv6_hash_secret));
+
+	lhash = (__force u32)laddr->s6_addr32[3];
+	fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
+
+	return __inet6_ehashfn(lhash, lport, fhash, fport,
+			       udp_ipv6_hash_secret + net_hash_mix(net));
+}
+
+int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
-	struct sock *sk2;
-	struct hlist_node *node;
-
-	write_lock_bh(&udp_hash_lock);
-	if (snum == 0) {
-		int best_size_so_far, best, result, i;
-
-		if (udp_port_rover > sysctl_local_port_range[1] ||
-		    udp_port_rover < sysctl_local_port_range[0])
-			udp_port_rover = sysctl_local_port_range[0];
-		best_size_so_far = 32767;
-		best = result = udp_port_rover;
-		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
-			int size;
-			struct hlist_head *list;
-
-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-			if (hlist_empty(list)) {
-				if (result > sysctl_local_port_range[1])
-					result = sysctl_local_port_range[0] +
-						((result - sysctl_local_port_range[0]) &
-						 (UDP_HTABLE_SIZE - 1));
-				goto gotit;
-			}
-			size = 0;
-			sk_for_each(sk2, node, list)
-				if (++size >= best_size_so_far)
-					goto next;
-			best_size_so_far = size;
-			best = result;
-		next:;
-		}
-		result = best;
-		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
-			if (result > sysctl_local_port_range[1])
-				result = sysctl_local_port_range[0]
-					+ ((result - sysctl_local_port_range[0]) &
-					   (UDP_HTABLE_SIZE - 1));
-			if (!udp_lport_inuse(result))
-				break;
-		}
-		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
-			goto fail;
-gotit:
-		udp_port_rover = snum = result;
-	} else {
-		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
-			if (inet_sk(sk2)->num == snum &&
-			    sk2 != sk &&
-			    (!sk2->sk_bound_dev_if ||
-			     !sk->sk_bound_dev_if ||
-			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    (!sk2->sk_reuse || !sk->sk_reuse) &&
-			    ipv6_rcv_saddr_equal(sk, sk2))
-				goto fail;
-		}
-	}
+	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+	int sk_ipv6only = ipv6_only_sock(sk);
+	int sk2_ipv6only = inet_v6_ipv6only(sk2);
+	int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+	/* if both are mapped, treat as IPv4 */
+	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
+		return (!sk2_ipv6only &&
+			(!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
+			  sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
+
+	if (addr_type2 == IPV6_ADDR_ANY &&
+	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+		return 1;
+
+	if (addr_type == IPV6_ADDR_ANY &&
+	    !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+		return 1;
+
+	if (sk2_rcv_saddr6 &&
+	    ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+		return 1;
 
-	inet_sk(sk)->num = snum;
-	if (sk_unhashed(sk)) {
-		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
-		sock_prot_inc_use(sk->sk_prot);
-	}
-	write_unlock_bh(&udp_hash_lock);
 	return 0;
+}
+
+static unsigned int udp6_portaddr_hash(struct net *net,
+				       const struct in6_addr *addr6,
+				       unsigned int port)
+{
+	unsigned int hash, mix = net_hash_mix(net);
 
-fail:
-	write_unlock_bh(&udp_hash_lock);
-	return 1;
+	if (ipv6_addr_any(addr6))
+		hash = jhash_1word(0, mix);
+	else if (ipv6_addr_v4mapped(addr6))
+		hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
+	else
+		hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
+
+	return hash ^ port;
+}
+
+
+int udp_v6_get_port(struct sock *sk, unsigned short snum)
+{
+	unsigned int hash2_nulladdr =
+		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
+	unsigned int hash2_partial =
+		udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
+
+	/* precompute partial secondary hash */
+	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
+	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
 }
 
-static void udp_v6_hash(struct sock *sk)
+static void udp_v6_rehash(struct sock *sk)
 {
-	BUG();
+	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+					  &sk->sk_v6_rcv_saddr,
+					  inet_sk(sk)->inet_num);
+
+	udp_lib_rehash(sk, new_hash);
 }
 
-static void udp_v6_unhash(struct sock *sk)
+static inline int compute_score(struct sock *sk, struct net *net,
+				unsigned short hnum,
+				const struct in6_addr *saddr, __be16 sport,
+				const struct in6_addr *daddr, __be16 dport,
+				int dif)
 {
- 	write_lock_bh(&udp_hash_lock);
-	if (sk_del_node_init(sk)) {
-		inet_sk(sk)->num = 0;
-		sock_prot_dec_use(sk->sk_prot);
+	int score = -1;
+
+	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
+			sk->sk_family == PF_INET6) {
+		struct inet_sock *inet = inet_sk(sk);
+
+		score = 0;
+		if (inet->inet_dport) {
+			if (inet->inet_dport != sport)
+				return -1;
+			score++;
+		}
+		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+				return -1;
+			score++;
+		}
+		if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
+				return -1;
+			score++;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				return -1;
+			score++;
+		}
 	}
-	write_unlock_bh(&udp_hash_lock);
+	return score;
 }
 
-static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport,
-				  struct in6_addr *daddr, u16 dport, int dif)
+#define SCORE2_MAX (1 + 1 + 1)
+static inline int compute_score2(struct sock *sk, struct net *net,
+				const struct in6_addr *saddr, __be16 sport,
+				const struct in6_addr *daddr, unsigned short hnum,
+				int dif)
 {
-	struct sock *sk, *result = NULL;
-	struct hlist_node *node;
-	unsigned short hnum = ntohs(dport);
-	int badness = -1;
+	int score = -1;
 
- 	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
+			sk->sk_family == PF_INET6) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (inet->num == hnum && sk->sk_family == PF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
-			int score = 0;
-			if (inet->dport) {
-				if (inet->dport != sport)
-					continue;
-				score++;
-			}
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
-					continue;
-				score++;
-			}
-			if (!ipv6_addr_any(&np->daddr)) {
-				if (!ipv6_addr_equal(&np->daddr, saddr))
-					continue;
-				score++;
-			}
-			if (sk->sk_bound_dev_if) {
-				if (sk->sk_bound_dev_if != dif)
-					continue;
-				score++;
-			}
-			if(score == 4) {
-				result = sk;
-				break;
-			} else if(score > badness) {
+		if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+			return -1;
+		score = 0;
+		if (inet->inet_dport) {
+			if (inet->inet_dport != sport)
+				return -1;
+			score++;
+		}
+		if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
+				return -1;
+			score++;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				return -1;
+			score++;
+		}
+	}
+	return score;
+}
+
+
+/* called with read_rcu_lock() */
+static struct sock *udp6_lib_lookup2(struct net *net,
+		const struct in6_addr *saddr, __be16 sport,
+		const struct in6_addr *daddr, unsigned int hnum, int dif,
+		struct udp_hslot *hslot2, unsigned int slot2)
+{
+	struct sock *sk, *result;
+	struct hlist_nulls_node *node;
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
+
+begin:
+	result = NULL;
+	badness = -1;
+	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+		score = compute_score2(sk, net, saddr, sport,
+				      daddr, hnum, dif);
+		if (score > badness) {
+			result = sk;
+			badness = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = udp6_ehashfn(net, daddr, hnum,
+						    saddr, sport);
+				matches = 1;
+			} else if (score == SCORE2_MAX)
+				goto exact_match;
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
 				result = sk;
-				badness = score;
+			hash = next_pseudo_random32(hash);
+		}
+	}
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != slot2)
+		goto begin;
+
+	if (result) {
+exact_match:
+		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
+			result = NULL;
+		else if (unlikely(compute_score2(result, net, saddr, sport,
+				  daddr, hnum, dif) < badness)) {
+			sock_put(result);
+			goto begin;
+		}
+	}
+	return result;
+}
+
+struct sock *__udp6_lib_lookup(struct net *net,
+				      const struct in6_addr *saddr, __be16 sport,
+				      const struct in6_addr *daddr, __be16 dport,
+				      int dif, struct udp_table *udptable)
+{
+	struct sock *sk, *result;
+	struct hlist_nulls_node *node;
+	unsigned short hnum = ntohs(dport);
+	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
+	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
+	int score, badness, matches = 0, reuseport = 0;
+	u32 hash = 0;
+
+	rcu_read_lock();
+	if (hslot->count > 10) {
+		hash2 = udp6_portaddr_hash(net, daddr, hnum);
+		slot2 = hash2 & udptable->mask;
+		hslot2 = &udptable->hash2[slot2];
+		if (hslot->count < hslot2->count)
+			goto begin;
+
+		result = udp6_lib_lookup2(net, saddr, sport,
+					  daddr, hnum, dif,
+					  hslot2, slot2);
+		if (!result) {
+			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
+			slot2 = hash2 & udptable->mask;
+			hslot2 = &udptable->hash2[slot2];
+			if (hslot->count < hslot2->count)
+				goto begin;
+
+			result = udp6_lib_lookup2(net, saddr, sport,
+						  &in6addr_any, hnum, dif,
+						  hslot2, slot2);
+		}
+		rcu_read_unlock();
+		return result;
+	}
+begin:
+	result = NULL;
+	badness = -1;
+	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
+		if (score > badness) {
+			result = sk;
+			badness = score;
+			reuseport = sk->sk_reuseport;
+			if (reuseport) {
+				hash = udp6_ehashfn(net, daddr, hnum,
+						    saddr, sport);
+				matches = 1;
 			}
+		} else if (score == badness && reuseport) {
+			matches++;
+			if (((u64)hash * matches) >> 32 == 0)
+				result = sk;
+			hash = next_pseudo_random32(hash);
 		}
 	}
-	if (result)
-		sock_hold(result);
- 	read_unlock(&udp_hash_lock);
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != slot)
+		goto begin;
+
+	if (result) {
+		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
+			result = NULL;
+		else if (unlikely(compute_score(result, net, hnum, saddr, sport,
+					daddr, dport, dif) < badness)) {
+			sock_put(result);
+			goto begin;
+		}
+	}
+	rcu_read_unlock();
 	return result;
 }
+EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
 
-/*
- *
- */
+static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
+					  __be16 sport, __be16 dport,
+					  struct udp_table *udptable)
+{
+	struct sock *sk;
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	if (unlikely(sk = skb_steal_sock(skb)))
+		return sk;
+	return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
+				 &iph->daddr, dport, inet6_iif(skb),
+				 udptable);
+}
 
-static void udpv6_close(struct sock *sk, long timeout)
+struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+			     const struct in6_addr *daddr, __be16 dport, int dif)
 {
-	sk_common_release(sk);
+	return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
 }
+EXPORT_SYMBOL_GPL(udp6_lib_lookup);
+
 
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
  */
 
-static int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, 
+int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		  struct msghdr *msg, size_t len,
 		  int noblock, int flags, int *addr_len)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
-  	struct sk_buff *skb;
-	size_t copied;
-  	int err;
+	struct sk_buff *skb;
+	unsigned int ulen, copied;
+	int peeked, off = 0;
+	int err;
+	int is_udplite = IS_UDPLITE(sk);
+	int is_udp4;
+	bool slow;
 
-  	if (addr_len)
-  		*addr_len=sizeof(struct sockaddr_in6);
-  
 	if (flags & MSG_ERRQUEUE)
-		return ipv6_recv_error(sk, msg, len);
+		return ipv6_recv_error(sk, msg, len, addr_len);
+
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
 
 try_again:
-	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+				  &peeked, &off, &err);
 	if (!skb)
 		goto out;
 
- 	copied = skb->len - sizeof(struct udphdr);
-  	if (copied > len) {
-  		copied = len;
-  		msg->msg_flags |= MSG_TRUNC;
-  	}
-
-	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
-		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
-					      copied);
-	} else if (msg->msg_flags&MSG_TRUNC) {
-		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+	ulen = skb->len - sizeof(struct udphdr);
+	copied = len;
+	if (copied > ulen)
+		copied = ulen;
+	else if (copied < ulen)
+		msg->msg_flags |= MSG_TRUNC;
+
+	is_udp4 = (skb->protocol == htons(ETH_P_IP));
+
+	/*
+	 * If checksum is needed at all, try to do it while copying the
+	 * data.  If the data is truncated, or if we only want a partial
+	 * coverage checksum (UDP-Lite), do it before the copy.
+	 */
+
+	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+		if (udp_lib_checksum_complete(skb))
 			goto csum_copy_err;
-		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
-					      copied);
-	} else {
+	}
+
+	if (skb_csum_unnecessary(skb))
+		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
+					      msg->msg_iov, copied);
+	else {
 		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
 		if (err == -EINVAL)
 			goto csum_copy_err;
 	}
-	if (err)
+	if (unlikely(err)) {
+		trace_kfree_skb(skb, udpv6_recvmsg);
+		if (!peeked) {
+			atomic_inc(&sk->sk_drops);
+			if (is_udp4)
+				UDP_INC_STATS_USER(sock_net(sk),
+						   UDP_MIB_INERRORS,
+						   is_udplite);
+			else
+				UDP6_INC_STATS_USER(sock_net(sk),
+						    UDP_MIB_INERRORS,
+						    is_udplite);
+		}
 		goto out_free;
+	}
+	if (!peeked) {
+		if (is_udp4)
+			UDP_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_INDATAGRAMS, is_udplite);
+		else
+			UDP6_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_INDATAGRAMS, is_udplite);
+	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (msg->msg_name) {
-		struct sockaddr_in6 *sin6;
-	  
-		sin6 = (struct sockaddr_in6 *) msg->msg_name;
+		DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
 		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = skb->h.uh->source;
+		sin6->sin6_port = udp_hdr(skb)->source;
 		sin6->sin6_flowinfo = 0;
-		sin6->sin6_scope_id = 0;
-
-		if (skb->protocol == htons(ETH_P_IP))
-			ipv6_addr_set(&sin6->sin6_addr, 0, 0,
-				      htonl(0xffff), skb->nh.iph->saddr);
-		else {
-			ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
-			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-				sin6->sin6_scope_id = IP6CB(skb)->iif;
-		}
 
+		if (is_udp4) {
+			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+					       &sin6->sin6_addr);
+			sin6->sin6_scope_id = 0;
+		} else {
+			sin6->sin6_addr = ipv6_hdr(skb)->saddr;
+			sin6->sin6_scope_id =
+				ipv6_iface_scope_id(&sin6->sin6_addr,
+						    IP6CB(skb)->iif);
+		}
+		*addr_len = sizeof(*sin6);
 	}
-	if (skb->protocol == htons(ETH_P_IP)) {
+
+	if (np->rxopt.all)
+		ip6_datagram_recv_common_ctl(sk, msg, skb);
+
+	if (is_udp4) {
 		if (inet->cmsg_flags)
 			ip_cmsg_recv(msg, skb);
 	} else {
 		if (np->rxopt.all)
-			datagram_recv_ctl(sk, msg, skb);
-  	}
+			ip6_datagram_recv_specific_ctl(sk, msg, skb);
+	}
 
 	err = copied;
 	if (flags & MSG_TRUNC)
-		err = skb->len - sizeof(struct udphdr);
+		err = ulen;
 
 out_free:
-	skb_free_datagram(sk, skb);
+	skb_free_datagram_locked(sk, skb);
 out:
 	return err;
 
 csum_copy_err:
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
+	slow = lock_sock_fast(sk);
+	if (!skb_kill_datagram(sk, skb, flags)) {
+		if (is_udp4) {
+			UDP_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_CSUMERRORS, is_udplite);
+			UDP_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_INERRORS, is_udplite);
+		} else {
+			UDP6_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_CSUMERRORS, is_udplite);
+			UDP6_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_INERRORS, is_udplite);
 		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
 	}
+	unlock_sock_fast(sk, slow);
 
-	skb_free_datagram(sk, skb);
-
-	if (flags & MSG_DONTWAIT) {
-		UDP6_INC_STATS_USER(UDP_MIB_INERRORS);
+	if (noblock)
 		return -EAGAIN;
-	}
+
+	/* starting over for a new packet */
+	msg->msg_flags &= ~MSG_TRUNC;
 	goto try_again;
 }
 
-static void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-	       int type, int code, int offset, __u32 info)
+void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		    u8 type, u8 code, int offset, __be32 info,
+		    struct udp_table *udptable)
 {
 	struct ipv6_pinfo *np;
-	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
-	struct net_device *dev = skb->dev;
-	struct in6_addr *saddr = &hdr->saddr;
-	struct in6_addr *daddr = &hdr->daddr;
+	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+	const struct in6_addr *saddr = &hdr->saddr;
+	const struct in6_addr *daddr = &hdr->daddr;
 	struct udphdr *uh = (struct udphdr*)(skb->data+offset);
 	struct sock *sk;
 	int err;
 
-	sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, dev->ifindex);
-   
+	sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
+			       saddr, uh->source, inet6_iif(skb), udptable);
 	if (sk == NULL)
 		return;
 
+	if (type == ICMPV6_PKT_TOOBIG) {
+		if (!ip6_sk_accept_pmtu(sk))
+			goto out;
+		ip6_sk_update_pmtu(skb, sk, info);
+	}
+	if (type == NDISC_REDIRECT) {
+		ip6_sk_redirect(skb, sk);
+		goto out;
+	}
+
 	np = inet6_sk(sk);
 
 	if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
@@ -356,197 +567,387 @@ out:
 	sock_put(sk);
 }
 
-static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
-	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+	int rc;
+
+	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+		sock_rps_save_rxhash(sk, skb);
+		sk_mark_napi_id(sk, skb);
+	}
+
+	rc = sock_queue_rcv_skb(sk, skb);
+	if (rc < 0) {
+		int is_udplite = IS_UDPLITE(sk);
+
+		/* Note that an ENOMEM error is charged twice */
+		if (rc == -ENOMEM)
+			UDP6_INC_STATS_BH(sock_net(sk),
+					UDP_MIB_RCVBUFERRORS, is_udplite);
+		UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 		kfree_skb(skb);
 		return -1;
 	}
+	return 0;
+}
+
+static __inline__ void udpv6_err(struct sk_buff *skb,
+				 struct inet6_skb_parm *opt, u8 type,
+				 u8 code, int offset, __be32 info     )
+{
+	__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
+}
+
+static struct static_key udpv6_encap_needed __read_mostly;
+void udpv6_encap_enable(void)
+{
+	if (!static_key_enabled(&udpv6_encap_needed))
+		static_key_slow_inc(&udpv6_encap_needed);
+}
+EXPORT_SYMBOL(udpv6_encap_enable);
+
+int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct udp_sock *up = udp_sk(sk);
+	int rc;
+	int is_udplite = IS_UDPLITE(sk);
+
+	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto drop;
+
+	if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
+
+		/*
+		 * This is an encapsulation socket so pass the skb to
+		 * the socket's udp_encap_rcv() hook. Otherwise, just
+		 * fall through and pass this up the UDP socket.
+		 * up->encap_rcv() returns the following value:
+		 * =0 if skb was successfully passed to the encap
+		 *    handler or was discarded by it.
+		 * >0 if skb should be passed on to UDP.
+		 * <0 if skb should be resubmitted as proto -N
+		 */
+
+		/* if we're overly short, let UDP handle it */
+		encap_rcv = ACCESS_ONCE(up->encap_rcv);
+		if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
+			int ret;
+
+			/* Verify checksum before giving to encap */
+			if (udp_lib_checksum_complete(skb))
+				goto csum_error;
+
+			ret = encap_rcv(sk, skb);
+			if (ret <= 0) {
+				UDP_INC_STATS_BH(sock_net(sk),
+						 UDP_MIB_INDATAGRAMS,
+						 is_udplite);
+				return -ret;
+			}
+		}
+
+		/* FALLTHROUGH -- it's a UDP Packet */
+	}
+
+	/*
+	 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
+	 */
+	if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
 
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
-		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
-			UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
-			kfree_skb(skb);
-			return 0;
+		if (up->pcrlen == 0) {          /* full coverage was set  */
+			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
+				" %d while full coverage %d requested\n",
+				UDP_SKB_CB(skb)->cscov, skb->len);
+			goto drop;
+		}
+		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
+			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
+						    "too small, need min %d\n",
+				       UDP_SKB_CB(skb)->cscov, up->pcrlen);
+			goto drop;
 		}
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (sock_queue_rcv_skb(sk,skb)<0) {
-		UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
-		kfree_skb(skb);
-		return 0;
+	if (rcu_access_pointer(sk->sk_filter)) {
+		if (udp_lib_checksum_complete(skb))
+			goto csum_error;
 	}
-	UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
-	return 0;
+
+	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+		UDP6_INC_STATS_BH(sock_net(sk),
+				  UDP_MIB_RCVBUFERRORS, is_udplite);
+		goto drop;
+	}
+
+	skb_dst_drop(skb);
+
+	bh_lock_sock(sk);
+	rc = 0;
+	if (!sock_owned_by_user(sk))
+		rc = __udpv6_queue_rcv_skb(sk, skb);
+	else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
+		bh_unlock_sock(sk);
+		goto drop;
+	}
+	bh_unlock_sock(sk);
+
+	return rc;
+
+csum_error:
+	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
+drop:
+	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+	atomic_inc(&sk->sk_drops);
+	kfree_skb(skb);
+	return -1;
 }
 
-static struct sock *udp_v6_mcast_next(struct sock *sk,
-				      u16 loc_port, struct in6_addr *loc_addr,
-				      u16 rmt_port, struct in6_addr *rmt_addr,
+static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
+				      __be16 loc_port, const struct in6_addr *loc_addr,
+				      __be16 rmt_port, const struct in6_addr *rmt_addr,
 				      int dif)
 {
-	struct hlist_node *node;
-	struct sock *s = sk;
+	struct hlist_nulls_node *node;
 	unsigned short num = ntohs(loc_port);
 
-	sk_for_each_from(s, node) {
-		struct inet_sock *inet = inet_sk(s);
+	sk_nulls_for_each_from(sk, node) {
+		struct inet_sock *inet = inet_sk(sk);
 
-		if (inet->num == num && s->sk_family == PF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(s);
-			if (inet->dport) {
-				if (inet->dport != rmt_port)
+		if (!net_eq(sock_net(sk), net))
+			continue;
+
+		if (udp_sk(sk)->udp_port_hash == num &&
+		    sk->sk_family == PF_INET6) {
+			if (inet->inet_dport) {
+				if (inet->inet_dport != rmt_port)
 					continue;
 			}
-			if (!ipv6_addr_any(&np->daddr) &&
-			    !ipv6_addr_equal(&np->daddr, rmt_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
-			if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
+			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
 				continue;
 
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+				if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
 					continue;
 			}
-			if(!inet6_mc_check(s, loc_addr, rmt_addr))
+			if (!inet6_mc_check(sk, loc_addr, rmt_addr))
 				continue;
-			return s;
+			return sk;
 		}
 	}
 	return NULL;
 }
 
+static void flush_stack(struct sock **stack, unsigned int count,
+			struct sk_buff *skb, unsigned int final)
+{
+	struct sk_buff *skb1 = NULL;
+	struct sock *sk;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		sk = stack[i];
+		if (likely(skb1 == NULL))
+			skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
+		if (!skb1) {
+			atomic_inc(&sk->sk_drops);
+			UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+					  IS_UDPLITE(sk));
+			UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
+					  IS_UDPLITE(sk));
+		}
+
+		if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
+			skb1 = NULL;
+	}
+	if (unlikely(skb1))
+		kfree_skb(skb1);
+}
+
+static void udp6_csum_zero_error(struct sk_buff *skb)
+{
+	/* RFC 2460 section 8.1 says that we SHOULD log
+	 * this error. Well, it is reasonable.
+	 */
+	LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+		       &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+		       &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
+}
+
 /*
  * Note: called only from the BH handler context,
  * so we don't need to lock the hashes.
  */
-static void udpv6_mcast_deliver(struct udphdr *uh,
-				struct in6_addr *saddr, struct in6_addr *daddr,
-				struct sk_buff *skb)
+static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
+		const struct in6_addr *saddr, const struct in6_addr *daddr,
+		struct udp_table *udptable)
 {
-	struct sock *sk, *sk2;
+	struct sock *sk, *stack[256 / sizeof(struct sock *)];
+	const struct udphdr *uh = udp_hdr(skb);
+	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
 	int dif;
+	unsigned int i, count = 0;
+
+	spin_lock(&hslot->lock);
+	sk = sk_nulls_head(&hslot->head);
+	dif = inet6_iif(skb);
+	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
+	while (sk) {
+		/* If zero checksum and no_check is not on for
+		 * the socket then skip it.
+		 */
+		if (uh->check || udp_sk(sk)->no_check6_rx)
+			stack[count++] = sk;
 
-	read_lock(&udp_hash_lock);
-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
-	dif = skb->dev->ifindex;
-	sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
-	if (!sk) {
-		kfree_skb(skb);
-		goto out;
+		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
+				       uh->source, saddr, dif);
+		if (unlikely(count == ARRAY_SIZE(stack))) {
+			if (!sk)
+				break;
+			flush_stack(stack, count, skb, ~0);
+			count = 0;
+		}
 	}
+	/*
+	 * before releasing the lock, we must take reference on sockets
+	 */
+	for (i = 0; i < count; i++)
+		sock_hold(stack[i]);
+
+	spin_unlock(&hslot->lock);
+
+	if (count) {
+		flush_stack(stack, count, skb, count - 1);
 
-	sk2 = sk;
-	while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr,
-					uh->source, saddr, dif))) {
-		struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
-		if (buff)
-			udpv6_queue_rcv_skb(sk2, buff);
+		for (i = 0; i < count; i++)
+			sock_put(stack[i]);
+	} else {
+		kfree_skb(skb);
 	}
-	udpv6_queue_rcv_skb(sk, skb);
-out:
-	read_unlock(&udp_hash_lock);
+	return 0;
 }
 
-static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+		   int proto)
 {
-	struct sk_buff *skb = *pskb;
+	struct net *net = dev_net(skb->dev);
 	struct sock *sk;
-  	struct udphdr *uh;
-	struct net_device *dev = skb->dev;
-	struct in6_addr *saddr, *daddr;
+	struct udphdr *uh;
+	const struct in6_addr *saddr, *daddr;
 	u32 ulen = 0;
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-		goto short_packet;
+		goto discard;
 
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
-	uh = skb->h.uh;
+	saddr = &ipv6_hdr(skb)->saddr;
+	daddr = &ipv6_hdr(skb)->daddr;
+	uh = udp_hdr(skb);
 
 	ulen = ntohs(uh->len);
-
-	/* Check for jumbo payload */
-	if (ulen == 0)
-		ulen = skb->len;
-
-	if (ulen > skb->len || ulen < sizeof(*uh))
+	if (ulen > skb->len)
 		goto short_packet;
 
-	if (uh->check == 0) {
-		/* RFC 2460 section 8.1 says that we SHOULD log
-		   this error. Well, it is reasonable.
-		 */
-		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
-		goto discard;
-	}
+	if (proto == IPPROTO_UDP) {
+		/* UDP validates ulen. */
 
-	if (ulen < skb->len) {
-		if (pskb_trim_rcsum(skb, ulen))
-			goto discard;
-		saddr = &skb->nh.ipv6h->saddr;
-		daddr = &skb->nh.ipv6h->daddr;
-		uh = skb->h.uh;
-	}
+		/* Check for jumbo payload */
+		if (ulen == 0)
+			ulen = skb->len;
 
-	if (skb->ip_summed==CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) {
-			LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n");
-			skb->ip_summed = CHECKSUM_NONE;
+		if (ulen < sizeof(*uh))
+			goto short_packet;
+
+		if (ulen < skb->len) {
+			if (pskb_trim_rcsum(skb, ulen))
+				goto short_packet;
+			saddr = &ipv6_hdr(skb)->saddr;
+			daddr = &ipv6_hdr(skb)->daddr;
+			uh = udp_hdr(skb);
 		}
 	}
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-		skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0);
 
-	/* 
-	 *	Multicast receive code 
+	if (udp6_csum_init(skb, uh, proto))
+		goto csum_error;
+
+	/*
+	 *	Multicast receive code
 	 */
-	if (ipv6_addr_is_multicast(daddr)) {
-		udpv6_mcast_deliver(uh, saddr, daddr, skb);
-		return 0;
-	}
+	if (ipv6_addr_is_multicast(daddr))
+		return __udp6_lib_mcast_deliver(net, skb,
+				saddr, daddr, udptable);
 
 	/* Unicast */
 
-	/* 
+	/*
 	 * check socket cache ... must talk to Alan about his plans
 	 * for sock caches... i'll skip this for now.
 	 */
-	sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex);
+	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
+	if (sk != NULL) {
+		int ret;
+
+		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+			sock_put(sk);
+			udp6_csum_zero_error(skb);
+			goto csum_error;
+		}
 
-	if (sk == NULL) {
-		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
-			goto discard;
+		ret = udpv6_queue_rcv_skb(sk, skb);
+		sock_put(sk);
 
-		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-		    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
-			goto discard;
-		UDP6_INC_STATS_BH(UDP_MIB_NOPORTS);
+		/* a return value > 0 means to resubmit the input, but
+		 * it wants the return to be -protocol, or 0
+		 */
+		if (ret > 0)
+			return -ret;
 
-		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
+		return 0;
+	}
 
-		kfree_skb(skb);
-		return(0);
+	if (!uh->check) {
+		udp6_csum_zero_error(skb);
+		goto csum_error;
 	}
-	
-	/* deliver */
-	
-	udpv6_queue_rcv_skb(sk, skb);
-	sock_put(sk);
-	return(0);
 
-short_packet:	
-	if (net_ratelimit())
-		printk(KERN_DEBUG "UDP: short packet: %d/%u\n", ulen, skb->len);
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto discard;
+
+	if (udp_lib_checksum_complete(skb))
+		goto csum_error;
+
+	UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+	kfree_skb(skb);
+	return 0;
 
+short_packet:
+	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
+		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
+		       saddr,
+		       ntohs(uh->source),
+		       ulen,
+		       skb->len,
+		       daddr,
+		       ntohs(uh->dest));
+	goto discard;
+csum_error:
+	UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
 discard:
-	UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
+	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
-	return(0);	
+	return 0;
+}
+
+static __inline__ int udpv6_rcv(struct sk_buff *skb)
+{
+	return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
 }
+
 /*
  * Throw away all pending data and cancel the corking. Socket is locked.
  */
@@ -554,24 +955,75 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 
-	if (up->pending) {
+	if (up->pending == AF_INET)
+		udp_flush_pending_frames(sk);
+	else if (up->pending) {
 		up->len = 0;
 		up->pending = 0;
 		ip6_flush_pending_frames(sk);
-        }
+	}
+}
+
+/**
+ * 	udp6_hwcsum_outgoing  -  handle outgoing HW checksumming
+ * 	@sk: 	socket we are sending on
+ * 	@skb: 	sk_buff containing the filled-in UDP header
+ * 	        (checksum field must be zeroed out)
+ */
+static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
+				 const struct in6_addr *saddr,
+				 const struct in6_addr *daddr, int len)
+{
+	unsigned int offset;
+	struct udphdr *uh = udp_hdr(skb);
+	__wsum csum = 0;
+
+	if (skb_queue_len(&sk->sk_write_queue) == 1) {
+		/* Only one fragment on the socket.  */
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0);
+	} else {
+		/*
+		 * HW-checksum won't work as there are two or more
+		 * fragments on the socket so that all csums of sk_buffs
+		 * should be together
+		 */
+		offset = skb_transport_offset(skb);
+		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+		skb->ip_summed = CHECKSUM_NONE;
+
+		skb_queue_walk(&sk->sk_write_queue, skb) {
+			csum = csum_add(csum, skb->csum);
+		}
+
+		uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
+					    csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	}
 }
 
 /*
  *	Sending
  */
 
-static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up)
+static int udp_v6_push_pending_frames(struct sock *sk)
 {
 	struct sk_buff *skb;
 	struct udphdr *uh;
+	struct udp_sock  *up = udp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
-	struct flowi *fl = &inet->cork.fl;
+	struct flowi6 *fl6;
 	int err = 0;
+	int is_udplite = IS_UDPLITE(sk);
+	__wsum csum = 0;
+
+	if (up->pending == AF_INET)
+		return udp_push_pending_frames(sk);
+
+	fl6 = &inet->cork.fl.u.ip6;
 
 	/* Grab the skbuff where UDP header space exists. */
 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
@@ -580,68 +1032,70 @@ static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up)
 	/*
 	 * Create a UDP header
 	 */
-	uh = skb->h.uh;
-	uh->source = fl->fl_ip_sport;
-	uh->dest = fl->fl_ip_dport;
+	uh = udp_hdr(skb);
+	uh->source = fl6->fl6_sport;
+	uh->dest = fl6->fl6_dport;
 	uh->len = htons(up->len);
 	uh->check = 0;
 
-	if (sk->sk_no_check == UDP_CSUM_NOXMIT) {
+	if (is_udplite)
+		csum = udplite_csum_outgoing(sk, skb);
+	else if (up->no_check6_tx) {   /* UDP csum disabled */
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
-	}
-
-	if (skb_queue_len(&sk->sk_write_queue) == 1) {
-		skb->csum = csum_partial((char *)uh,
-				sizeof(struct udphdr), skb->csum);
-		uh->check = csum_ipv6_magic(&fl->fl6_src,
-					    &fl->fl6_dst,
-					    up->len, fl->proto, skb->csum);
-	} else {
-		u32 tmp_csum = 0;
-
-		skb_queue_walk(&sk->sk_write_queue, skb) {
-			tmp_csum = csum_add(tmp_csum, skb->csum);
-		}
-		tmp_csum = csum_partial((char *)uh,
-				sizeof(struct udphdr), tmp_csum);
-                tmp_csum = csum_ipv6_magic(&fl->fl6_src,
-					   &fl->fl6_dst,
-					   up->len, fl->proto, tmp_csum);
-                uh->check = tmp_csum;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
+				     up->len);
+		goto send;
+	} else
+		csum = udp_csum_outgoing(sk, skb);
 
-	}
+	/* add protocol-dependent pseudo-header */
+	uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
+				    up->len, fl6->flowi6_proto, csum);
 	if (uh->check == 0)
-		uh->check = -1;
+		uh->check = CSUM_MANGLED_0;
 
 send:
 	err = ip6_push_pending_frames(sk);
+	if (err) {
+		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
+			UDP6_INC_STATS_USER(sock_net(sk),
+					    UDP_MIB_SNDBUFERRORS, is_udplite);
+			err = 0;
+		}
+	} else
+		UDP6_INC_STATS_USER(sock_net(sk),
+				    UDP_MIB_OUTDATAGRAMS, is_udplite);
 out:
 	up->len = 0;
 	up->pending = 0;
 	return err;
 }
 
-static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, 
+int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		  struct msghdr *msg, size_t len)
 {
 	struct ipv6_txoptions opt_space;
 	struct udp_sock *up = udp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
+	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
-	struct flowi *fl = &inet->cork.fl;
+	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_len = msg->msg_namelen;
 	int ulen = len;
 	int hlimit = -1;
 	int tclass = -1;
+	int dontfrag = -1;
 	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 	int err;
 	int connected = 0;
+	int is_udplite = IS_UDPLITE(sk);
+	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
 
 	/* destination address check */
 	if (sin6) {
@@ -667,15 +1121,15 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	} else if (!up->pending) {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
-		daddr = &np->daddr;
-	} else 
+		daddr = &sk->sk_v6_daddr;
+	} else
 		daddr = NULL;
 
 	if (daddr) {
-		if (ipv6_addr_type(daddr) == IPV6_ADDR_MAPPED) {
+		if (ipv6_addr_v4mapped(daddr)) {
 			struct sockaddr_in sin;
 			sin.sin_family = AF_INET;
-			sin.sin_port = sin6 ? sin6->sin6_port : inet->dport;
+			sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
 			sin.sin_addr.s_addr = daddr->s6_addr32[3];
 			msg->msg_name = &sin;
 			msg->msg_namelen = sizeof(sin);
@@ -690,11 +1144,11 @@ do_udp_sendmsg:
 		return udp_sendmsg(iocb, sk, msg, len);
 
 	/* Rough check on arithmetic overflow,
-	   better check is made in ip6_build_xmit
+	   better check is made in ip6_append_data().
 	   */
 	if (len > INT_MAX - sizeof(struct udphdr))
 		return -EMSGSIZE;
-	
+
 	if (up->pending) {
 		/*
 		 * There are pending frames.
@@ -713,22 +1167,21 @@ do_udp_sendmsg:
 	}
 	ulen += sizeof(struct udphdr);
 
-	memset(fl, 0, sizeof(*fl));
+	memset(&fl6, 0, sizeof(fl6));
 
 	if (sin6) {
 		if (sin6->sin6_port == 0)
 			return -EINVAL;
 
-		fl->fl_ip_dport = sin6->sin6_port;
+		fl6.fl6_dport = sin6->sin6_port;
 		daddr = &sin6->sin6_addr;
 
 		if (np->sndflow) {
-			fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-			if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-				flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+			fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+			if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 				if (flowlabel == NULL)
 					return -EINVAL;
-				daddr = &flowlabel->dst;
 			}
 		}
 
@@ -737,38 +1190,44 @@ do_udp_sendmsg:
 		 * sk->sk_dst_cache.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    ipv6_addr_equal(daddr, &np->daddr))
-			daddr = &np->daddr;
+		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+			daddr = &sk->sk_v6_daddr;
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
-		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
-			fl->oif = sin6->sin6_scope_id;
+		    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
+			fl6.flowi6_oif = sin6->sin6_scope_id;
 	} else {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 
-		fl->fl_ip_dport = inet->dport;
-		daddr = &np->daddr;
-		fl->fl6_flowlabel = np->flow_label;
+		fl6.fl6_dport = inet->inet_dport;
+		daddr = &sk->sk_v6_daddr;
+		fl6.flowlabel = np->flow_label;
 		connected = 1;
 	}
 
-	if (!fl->oif)
-		fl->oif = sk->sk_bound_dev_if;
+	if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = sk->sk_bound_dev_if;
+
+	if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
+	fl6.flowi6_mark = sk->sk_mark;
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+					    &hlimit, &tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
 		}
-		if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
-			flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+		if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
 		}
@@ -778,53 +1237,43 @@ do_udp_sendmsg:
 	}
 	if (opt == NULL)
 		opt = np->opt;
-	opt = fl6_merge_options(&opt_space, flowlabel, opt);
-
-	fl->proto = IPPROTO_UDP;
-	ipv6_addr_copy(&fl->fl6_dst, daddr);
-	if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&fl->fl6_src, &np->saddr);
-	fl->fl_ip_sport = inet->sport;
-	
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl->fl6_dst);
-		ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
-		final_p = &final;
+	if (flowlabel)
+		opt = fl6_merge_options(&opt_space, flowlabel, opt);
+	opt = ipv6_fixup_options(&opt_space, opt);
+
+	fl6.flowi6_proto = sk->sk_protocol;
+	if (!ipv6_addr_any(daddr))
+		fl6.daddr = *daddr;
+	else
+		fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+	if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
+		fl6.saddr = np->saddr;
+	fl6.fl6_sport = inet->inet_sport;
+
+	final_p = fl6_update_dst(&fl6, opt, &final);
+	if (final_p)
 		connected = 0;
-	}
 
-	if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) {
-		fl->oif = np->mcast_oif;
+	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
+		fl6.flowi6_oif = np->mcast_oif;
 		connected = 0;
-	}
+	} else if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->ucast_oif;
 
-	err = ip6_dst_lookup(sk, &dst, fl);
-	if (err)
-		goto out;
-	if (final_p)
-		ipv6_addr_copy(&fl->fl6_dst, final_p);
+	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0)
+	dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		dst = NULL;
 		goto out;
-
-	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(&fl->fl6_dst))
-			hlimit = np->mcast_hops;
-		else
-			hlimit = np->hop_limit;
-		if (hlimit < 0)
-			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
-		if (hlimit < 0)
-			hlimit = ipv6_get_hoplimit(dst->dev);
 	}
 
-	if (tclass < 0) {
+	if (hlimit < 0)
+		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+
+	if (tclass < 0)
 		tclass = np->tclass;
-		if (tclass < 0)
-			tclass = 0;
-	}
 
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
@@ -844,34 +1293,55 @@ back_from_confirm:
 	up->pending = AF_INET6;
 
 do_append_data:
+	if (dontfrag < 0)
+		dontfrag = np->dontfrag;
 	up->len += ulen;
-	err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen,
-		sizeof(struct udphdr), hlimit, tclass, opt, fl,
+	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
+	err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
+		sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
 		(struct rt6_info*)dst,
-		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
 	if (err)
 		udp_v6_flush_pending_frames(sk);
 	else if (!corkreq)
-		err = udp_v6_push_pending_frames(sk, up);
+		err = udp_v6_push_pending_frames(sk);
+	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
+		up->pending = 0;
 
 	if (dst) {
 		if (connected) {
 			ip6_dst_store(sk, dst,
-				      ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
-				      &np->daddr : NULL);
+				      ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+				      &sk->sk_v6_daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+				      ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
+				      &np->saddr :
+#endif
+				      NULL);
 		} else {
 			dst_release(dst);
 		}
+		dst = NULL;
 	}
 
 	if (err > 0)
 		err = np->recverr ? net_xmit_errno(err) : 0;
 	release_sock(sk);
 out:
+	dst_release(dst);
 	fl6_sock_release(flowlabel);
-	if (!err) {
-		UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
+	if (!err)
 		return len;
+	/*
+	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
+	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
+	 * we don't have a good statistic (IpOutDiscards but it can be too many
+	 * things).  We could add another new stat but at least for now that
+	 * seems like overkill.
+	 */
+	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		UDP6_INC_STATS_USER(sock_net(sk),
+				UDP_MIB_SNDBUFERRORS, is_udplite);
 	}
 	return err;
 
@@ -883,105 +1353,65 @@ do_confirm:
 	goto out;
 }
 
-static int udpv6_destroy_sock(struct sock *sk)
+void udpv6_destroy_sock(struct sock *sk)
 {
+	struct udp_sock *up = udp_sk(sk);
 	lock_sock(sk);
 	udp_v6_flush_pending_frames(sk);
 	release_sock(sk);
 
-	inet6_destroy_sock(sk);
+	if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+		void (*encap_destroy)(struct sock *sk);
+		encap_destroy = ACCESS_ONCE(up->encap_destroy);
+		if (encap_destroy)
+			encap_destroy(sk);
+	}
 
-	return 0;
+	inet6_destroy_sock(sk);
 }
 
 /*
  *	Socket option code for UDP
  */
-static int udpv6_setsockopt(struct sock *sk, int level, int optname, 
-			  char __user *optval, int optlen)
+int udpv6_setsockopt(struct sock *sk, int level, int optname,
+		     char __user *optval, unsigned int optlen)
 {
-	struct udp_sock *up = udp_sk(sk);
-	int val;
-	int err = 0;
-
-	if (level != SOL_UDP)
-		return ipv6_setsockopt(sk, level, optname, optval, optlen);
-
-	if(optlen<sizeof(int))
-		return -EINVAL;
-
-	if (get_user(val, (int __user *)optval))
-		return -EFAULT;
-
-	switch(optname) {
-	case UDP_CORK:
-		if (val != 0) {
-			up->corkflag = 1;
-		} else {
-			up->corkflag = 0;
-			lock_sock(sk);
-			udp_v6_push_pending_frames(sk, up);
-			release_sock(sk);
-		}
-		break;
-		
-	case UDP_ENCAP:
-		switch (val) {
-		case 0:
-			up->encap_type = val;
-			break;
-		default:
-			err = -ENOPROTOOPT;
-			break;
-		}
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	};
-
-	return err;
+	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
+		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+					  udp_v6_push_pending_frames);
+	return ipv6_setsockopt(sk, level, optname, optval, optlen);
 }
 
-static int udpv6_getsockopt(struct sock *sk, int level, int optname, 
-			  char __user *optval, int __user *optlen)
+#ifdef CONFIG_COMPAT
+int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, unsigned int optlen)
 {
-	struct udp_sock *up = udp_sk(sk);
-	int val, len;
-
-	if (level != SOL_UDP)
-		return ipv6_getsockopt(sk, level, optname, optval, optlen);
-
-	if(get_user(len,optlen))
-		return -EFAULT;
-
-	len = min_t(unsigned int, len, sizeof(int));
-	
-	if(len < 0)
-		return -EINVAL;
-
-	switch(optname) {
-	case UDP_CORK:
-		val = up->corkflag;
-		break;
-
-	case UDP_ENCAP:
-		val = up->encap_type;
-		break;
+	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
+		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+					  udp_v6_push_pending_frames);
+	return compat_ipv6_setsockopt(sk, level, optname, optval, optlen);
+}
+#endif
 
-	default:
-		return -ENOPROTOOPT;
-	};
+int udpv6_getsockopt(struct sock *sk, int level, int optname,
+		     char __user *optval, int __user *optlen)
+{
+	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
+		return udp_lib_getsockopt(sk, level, optname, optval, optlen);
+	return ipv6_getsockopt(sk, level, optname, optval, optlen);
+}
 
-  	if(put_user(len, optlen))
-  		return -EFAULT;
-	if(copy_to_user(optval, &val,len))
-		return -EFAULT;
-  	return 0;
+#ifdef CONFIG_COMPAT
+int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, int __user *optlen)
+{
+	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
+		return udp_lib_getsockopt(sk, level, optname, optval, optlen);
+	return compat_ipv6_getsockopt(sk, level, optname, optval, optlen);
 }
+#endif
 
-static struct inet6_protocol udpv6_protocol = {
+static const struct inet6_protocol udpv6_protocol = {
 	.handler	=	udpv6_rcv,
 	.err_handler	=	udpv6_err,
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
@@ -989,87 +1419,90 @@ static struct inet6_protocol udpv6_protocol = {
 
 /* ------------------------------------------------------------------------ */
 #ifdef CONFIG_PROC_FS
-
-static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket)
+int udp6_seq_show(struct seq_file *seq, void *v)
 {
-	struct inet_sock *inet = inet_sk(sp);
-	struct ipv6_pinfo *np = inet6_sk(sp);
-	struct in6_addr *dest, *src;
-	__u16 destp, srcp;
-
-	dest  = &np->daddr;
-	src   = &np->rcv_saddr;
-	destp = ntohs(inet->dport);
-	srcp  = ntohs(inet->sport);
-	seq_printf(seq,
-		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n",
-		   bucket,
-		   src->s6_addr32[0], src->s6_addr32[1],
-		   src->s6_addr32[2], src->s6_addr32[3], srcp,
-		   dest->s6_addr32[0], dest->s6_addr32[1],
-		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
-		   sp->sk_state, 
-		   atomic_read(&sp->sk_wmem_alloc),
-		   atomic_read(&sp->sk_rmem_alloc),
-		   0, 0L, 0,
-		   sock_i_uid(sp), 0,
-		   sock_i_ino(sp),
-		   atomic_read(&sp->sk_refcnt), sp);
-}
-
-static int udp6_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_printf(seq,
-			   "  sl  "
-			   "local_address                         "
-			   "remote_address                        "
-			   "st tx_queue rx_queue tr tm->when retrnsmt"
-			   "   uid  timeout inode\n");
-	else
-		udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+	} else {
+		int bucket = ((struct udp_iter_state *)seq->private)->bucket;
+		struct inet_sock *inet = inet_sk(v);
+		__u16 srcp = ntohs(inet->inet_sport);
+		__u16 destp = ntohs(inet->inet_dport);
+		ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
+	}
 	return 0;
 }
 
-static struct file_operations udp6_seq_fops;
+static const struct file_operations udp6_afinfo_seq_fops = {
+	.owner    = THIS_MODULE,
+	.open     = udp_seq_open,
+	.read     = seq_read,
+	.llseek   = seq_lseek,
+	.release  = seq_release_net
+};
+
 static struct udp_seq_afinfo udp6_seq_afinfo = {
-	.owner		= THIS_MODULE,
 	.name		= "udp6",
 	.family		= AF_INET6,
-	.seq_show	= udp6_seq_show,
-	.seq_fops	= &udp6_seq_fops,
+	.udp_table	= &udp_table,
+	.seq_fops	= &udp6_afinfo_seq_fops,
+	.seq_ops	= {
+		.show		= udp6_seq_show,
+	},
 };
 
-int __init udp6_proc_init(void)
+int __net_init udp6_proc_init(struct net *net)
 {
-	return udp_proc_register(&udp6_seq_afinfo);
+	return udp_proc_register(net, &udp6_seq_afinfo);
 }
 
-void udp6_proc_exit(void) {
-	udp_proc_unregister(&udp6_seq_afinfo);
+void udp6_proc_exit(struct net *net) {
+	udp_proc_unregister(net, &udp6_seq_afinfo);
 }
 #endif /* CONFIG_PROC_FS */
 
+void udp_v6_clear_sk(struct sock *sk, int size)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* we do not want to clear pinet6 field, because of RCU lookups */
+	sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
+
+	size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
+	memset(&inet->pinet6 + 1, 0, size);
+}
+
 /* ------------------------------------------------------------------------ */
 
 struct proto udpv6_prot = {
-	.name =		"UDPv6",
-	.owner =	THIS_MODULE,
-	.close =	udpv6_close,
-	.connect =	ip6_datagram_connect,
-	.disconnect =	udp_disconnect,
-	.ioctl =	udp_ioctl,
-	.destroy =	udpv6_destroy_sock,
-	.setsockopt =	udpv6_setsockopt,
-	.getsockopt =	udpv6_getsockopt,
-	.sendmsg =	udpv6_sendmsg,
-	.recvmsg =	udpv6_recvmsg,
-	.backlog_rcv =	udpv6_queue_rcv_skb,
-	.hash =		udp_v6_hash,
-	.unhash =	udp_v6_unhash,
-	.get_port =	udp_v6_get_port,
-	.obj_size =	sizeof(struct udp6_sock),
+	.name		   = "UDPv6",
+	.owner		   = THIS_MODULE,
+	.close		   = udp_lib_close,
+	.connect	   = ip6_datagram_connect,
+	.disconnect	   = udp_disconnect,
+	.ioctl		   = udp_ioctl,
+	.destroy	   = udpv6_destroy_sock,
+	.setsockopt	   = udpv6_setsockopt,
+	.getsockopt	   = udpv6_getsockopt,
+	.sendmsg	   = udpv6_sendmsg,
+	.recvmsg	   = udpv6_recvmsg,
+	.backlog_rcv	   = __udpv6_queue_rcv_skb,
+	.hash		   = udp_lib_hash,
+	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v6_rehash,
+	.get_port	   = udp_v6_get_port,
+	.memory_allocated  = &udp_memory_allocated,
+	.sysctl_mem	   = sysctl_udp_mem,
+	.sysctl_wmem	   = &sysctl_udp_wmem_min,
+	.sysctl_rmem	   = &sysctl_udp_rmem_min,
+	.obj_size	   = sizeof(struct udp6_sock),
+	.slab_flags	   = SLAB_DESTROY_BY_RCU,
+	.h.udp_table	   = &udp_table,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_udpv6_setsockopt,
+	.compat_getsockopt = compat_udpv6_getsockopt,
+#endif
+	.clear_sk	   = udp_v6_clear_sk,
 };
 
 static struct inet_protosw udpv6_protosw = {
@@ -1077,15 +1510,31 @@ static struct inet_protosw udpv6_protosw = {
 	.protocol =  IPPROTO_UDP,
 	.prot =      &udpv6_prot,
 	.ops =       &inet6_dgram_ops,
-	.capability =-1,
-	.no_check =  UDP_CSUM_DEFAULT,
 	.flags =     INET_PROTOSW_PERMANENT,
 };
 
 
-void __init udpv6_init(void)
+int __init udpv6_init(void)
+{
+	int ret;
+
+	ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
+	if (ret)
+		goto out;
+
+	ret = inet6_register_protosw(&udpv6_protosw);
+	if (ret)
+		goto out_udpv6_protocol;
+out:
+	return ret;
+
+out_udpv6_protocol:
+	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+	goto out;
+}
+
+void udpv6_exit(void)
 {
-	if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0)
-		printk(KERN_ERR "udpv6_init: Could not register protocol\n");
-	inet6_register_protosw(&udpv6_protosw);
+	inet6_unregister_protosw(&udpv6_protosw);
+	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
 }
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
new file mode 100644
index 00000000000..c779c3c90b9
--- /dev/null
+++ b/net/ipv6/udp_impl.h
@@ -0,0 +1,38 @@
+#ifndef _UDP6_IMPL_H
+#define _UDP6_IMPL_H
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <net/protocol.h>
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+#include <net/transp_v6.h>
+
+int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
+void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
+		    __be32, struct udp_table *);
+
+int udp_v6_get_port(struct sock *sk, unsigned short snum);
+
+int udpv6_getsockopt(struct sock *sk, int level, int optname,
+		     char __user *optval, int __user *optlen);
+int udpv6_setsockopt(struct sock *sk, int level, int optname,
+		     char __user *optval, unsigned int optlen);
+#ifdef CONFIG_COMPAT
+int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, unsigned int optlen);
+int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, int __user *optlen);
+#endif
+int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		  size_t len);
+int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		  size_t len, int noblock, int flags, int *addr_len);
+int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+void udpv6_destroy_sock(struct sock *sk);
+
+void udp_v6_clear_sk(struct sock *sk, int size);
+
+#ifdef CONFIG_PROC_FS
+int udp6_seq_show(struct seq_file *seq, void *v);
+#endif
+#endif	/* _UDP6_IMPL_H */
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
new file mode 100644
index 00000000000..0ae3d98f83e
--- /dev/null
+++ b/net/ipv6/udp_offload.c
@@ -0,0 +1,140 @@
+/*
+ *	IPV6 GSO/GRO offload support
+ *	Linux INET6 implementation
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *      UDPv6 GSO support
+ */
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/ip6_checksum.h>
+#include "ip6_offload.h"
+
+static int udp6_ufo_send_check(struct sk_buff *skb)
+{
+	const struct ipv6hdr *ipv6h;
+	struct udphdr *uh;
+
+	if (!pskb_may_pull(skb, sizeof(*uh)))
+		return -EINVAL;
+
+	if (likely(!skb->encapsulation)) {
+		ipv6h = ipv6_hdr(skb);
+		uh = udp_hdr(skb);
+
+		uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+					     IPPROTO_UDP, 0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		skb->ip_summed = CHECKSUM_PARTIAL;
+	}
+
+	return 0;
+}
+
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+					 netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	unsigned int mss;
+	unsigned int unfrag_ip6hlen, unfrag_len;
+	struct frag_hdr *fptr;
+	u8 *packet_start, *prevhdr;
+	u8 nexthdr;
+	u8 frag_hdr_sz = sizeof(struct frag_hdr);
+	int offset;
+	__wsum csum;
+	int tnl_hlen;
+
+	mss = skb_shinfo(skb)->gso_size;
+	if (unlikely(skb->len <= mss))
+		goto out;
+
+	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+		/* Packet is from an untrusted source, reset gso_segs. */
+		int type = skb_shinfo(skb)->gso_type;
+
+		if (unlikely(type & ~(SKB_GSO_UDP |
+				      SKB_GSO_DODGY |
+				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_UDP_TUNNEL_CSUM |
+				      SKB_GSO_GRE |
+				      SKB_GSO_GRE_CSUM |
+				      SKB_GSO_IPIP |
+				      SKB_GSO_SIT |
+				      SKB_GSO_MPLS) ||
+			     !(type & (SKB_GSO_UDP))))
+			goto out;
+
+		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+
+		segs = NULL;
+		goto out;
+	}
+
+	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
+		segs = skb_udp_tunnel_segment(skb, features);
+	else {
+		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+		 * do checksum of UDP packets sent as multiple IP fragments.
+		 */
+		offset = skb_checksum_start_offset(skb);
+		csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		offset += skb->csum_offset;
+		*(__sum16 *)(skb->data + offset) = csum_fold(csum);
+		skb->ip_summed = CHECKSUM_NONE;
+
+		/* Check if there is enough headroom to insert fragment header. */
+		tnl_hlen = skb_tnl_header_len(skb);
+		if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
+			if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+				goto out;
+		}
+
+		/* Find the unfragmentable header and shift it left by frag_hdr_sz
+		 * bytes to insert fragment header.
+		 */
+		unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+		nexthdr = *prevhdr;
+		*prevhdr = NEXTHDR_FRAGMENT;
+		unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
+			     unfrag_ip6hlen + tnl_hlen;
+		packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
+		memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
+
+		SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
+		skb->mac_header -= frag_hdr_sz;
+		skb->network_header -= frag_hdr_sz;
+
+		fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+		fptr->nexthdr = nexthdr;
+		fptr->reserved = 0;
+		fptr->identification = skb_shinfo(skb)->ip6_frag_id;
+
+		/* Fragment the skb. ipv6 header and the remaining fields of the
+		 * fragment header are updated in ipv6_gso_segment()
+		 */
+		segs = skb_segment(skb, features);
+	}
+
+out:
+	return segs;
+}
+static const struct net_offload udpv6_offload = {
+	.callbacks = {
+		.gso_send_check =	udp6_ufo_send_check,
+		.gso_segment	=	udp6_ufo_fragment,
+	},
+};
+
+int __init udp_offload_init(void)
+{
+	return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
+}
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
new file mode 100644
index 00000000000..9cf097e206e
--- /dev/null
+++ b/net/ipv6/udplite.c
@@ -0,0 +1,139 @@
+/*
+ *  UDPLITEv6   An implementation of the UDP-Lite protocol over IPv6.
+ *              See also net/ipv4/udplite.c
+ *
+ *  Authors:    Gerrit Renker       <gerrit@erg.abdn.ac.uk>
+ *
+ *  Changes:
+ *  Fixes:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#include <linux/export.h>
+#include "udp_impl.h"
+
+static int udplitev6_rcv(struct sk_buff *skb)
+{
+	return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
+}
+
+static void udplitev6_err(struct sk_buff *skb,
+			  struct inet6_skb_parm *opt,
+			  u8 type, u8 code, int offset, __be32 info)
+{
+	__udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
+}
+
+static const struct inet6_protocol udplitev6_protocol = {
+	.handler	=	udplitev6_rcv,
+	.err_handler	=	udplitev6_err,
+	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+struct proto udplitev6_prot = {
+	.name		   = "UDPLITEv6",
+	.owner		   = THIS_MODULE,
+	.close		   = udp_lib_close,
+	.connect	   = ip6_datagram_connect,
+	.disconnect	   = udp_disconnect,
+	.ioctl		   = udp_ioctl,
+	.init		   = udplite_sk_init,
+	.destroy	   = udpv6_destroy_sock,
+	.setsockopt	   = udpv6_setsockopt,
+	.getsockopt	   = udpv6_getsockopt,
+	.sendmsg	   = udpv6_sendmsg,
+	.recvmsg	   = udpv6_recvmsg,
+	.backlog_rcv	   = udpv6_queue_rcv_skb,
+	.hash		   = udp_lib_hash,
+	.unhash		   = udp_lib_unhash,
+	.get_port	   = udp_v6_get_port,
+	.obj_size	   = sizeof(struct udp6_sock),
+	.slab_flags	   = SLAB_DESTROY_BY_RCU,
+	.h.udp_table	   = &udplite_table,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_udpv6_setsockopt,
+	.compat_getsockopt = compat_udpv6_getsockopt,
+#endif
+	.clear_sk	   = udp_v6_clear_sk,
+};
+
+static struct inet_protosw udplite6_protosw = {
+	.type		= SOCK_DGRAM,
+	.protocol	= IPPROTO_UDPLITE,
+	.prot		= &udplitev6_prot,
+	.ops		= &inet6_dgram_ops,
+	.flags		= INET_PROTOSW_PERMANENT,
+};
+
+int __init udplitev6_init(void)
+{
+	int ret;
+
+	ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
+	if (ret)
+		goto out;
+
+	ret = inet6_register_protosw(&udplite6_protosw);
+	if (ret)
+		goto out_udplitev6_protocol;
+out:
+	return ret;
+
+out_udplitev6_protocol:
+	inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
+	goto out;
+}
+
+void udplitev6_exit(void)
+{
+	inet6_unregister_protosw(&udplite6_protosw);
+	inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
+}
+
+#ifdef CONFIG_PROC_FS
+
+static const struct file_operations udplite6_afinfo_seq_fops = {
+	.owner    = THIS_MODULE,
+	.open     = udp_seq_open,
+	.read     = seq_read,
+	.llseek   = seq_lseek,
+	.release  = seq_release_net
+};
+
+static struct udp_seq_afinfo udplite6_seq_afinfo = {
+	.name		= "udplite6",
+	.family		= AF_INET6,
+	.udp_table	= &udplite_table,
+	.seq_fops	= &udplite6_afinfo_seq_fops,
+	.seq_ops	= {
+		.show		= udp6_seq_show,
+	},
+};
+
+static int __net_init udplite6_proc_init_net(struct net *net)
+{
+	return udp_proc_register(net, &udplite6_seq_afinfo);
+}
+
+static void __net_exit udplite6_proc_exit_net(struct net *net)
+{
+	udp_proc_unregister(net, &udplite6_seq_afinfo);
+}
+
+static struct pernet_operations udplite6_net_ops = {
+	.init = udplite6_proc_init_net,
+	.exit = udplite6_proc_exit_net,
+};
+
+int __init udplite6_proc_init(void)
+{
+	return register_pernet_subsys(&udplite6_net_ops);
+}
+
+void udplite6_proc_exit(void)
+{
+	unregister_pernet_subsys(&udplite6_net_ops);
+}
+#endif
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 28c29d78338..f8c3cf842f5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -11,140 +11,136 @@
 
 #include <linux/module.h>
 #include <linux/string.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
-static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
+int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct ipv6hdr *outer_iph = skb->nh.ipv6h;
-	struct ipv6hdr *inner_iph = skb->h.ipv6h;
+	return xfrm6_extract_header(skb);
+}
 
-	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
-		IP6_ECN_set_ce(inner_iph);
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
+{
+	XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
+	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+	return xfrm_input(skb, nexthdr, spi, 0);
 }
+EXPORT_SYMBOL(xfrm6_rcv_spi);
 
-int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
+int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
-	struct sk_buff *skb = *pskb;
-	int err;
-	u32 seq;
-	struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
-	struct xfrm_state *x;
-	int xfrm_nr = 0;
-	int decaps = 0;
-	int nexthdr;
-	unsigned int nhoff;
-
-	nhoff = *nhoffp;
-	nexthdr = skb->nh.raw[nhoff];
-
-	seq = 0;
-	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
-		goto drop;
-	
-	do {
-		struct ipv6hdr *iph = skb->nh.ipv6h;
+	skb_network_header(skb)[IP6CB(skb)->nhoff] =
+		XFRM_MODE_SKB_CB(skb)->protocol;
 
-		if (xfrm_nr == XFRM_MAX_DEPTH)
-			goto drop;
+#ifndef CONFIG_NETFILTER
+	if (!async)
+		return 1;
+#endif
 
-		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, nexthdr, AF_INET6);
-		if (x == NULL)
-			goto drop;
-		spin_lock(&x->lock);
-		if (unlikely(x->km.state != XFRM_STATE_VALID))
-			goto drop_unlock;
-
-		if (x->props.replay_window && xfrm_replay_check(x, seq))
-			goto drop_unlock;
-
-		if (xfrm_state_check_expire(x))
-			goto drop_unlock;
-
-		nexthdr = x->type->input(x, &(xfrm_vec[xfrm_nr].decap), skb);
-		if (nexthdr <= 0)
-			goto drop_unlock;
-
-		skb->nh.raw[nhoff] = nexthdr;
-
-		if (x->props.replay_window)
-			xfrm_replay_advance(x, seq);
-
-		x->curlft.bytes += skb->len;
-		x->curlft.packets++;
-
-		spin_unlock(&x->lock);
-
-		xfrm_vec[xfrm_nr++].xvec = x;
-
-		if (x->props.mode) { /* XXX */
-			if (nexthdr != IPPROTO_IPV6)
-				goto drop;
-			if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-				goto drop;
-			if (skb_cloned(skb) &&
-			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-				goto drop;
-			if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-				ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
-			if (!(x->props.flags & XFRM_STATE_NOECN))
-				ipip6_ecn_decapsulate(skb);
-			skb->mac.raw = memmove(skb->data - skb->mac_len,
-					       skb->mac.raw, skb->mac_len);
-			skb->nh.raw = skb->data;
-			decaps = 1;
-			break;
-		}
+	ipv6_hdr(skb)->payload_len = htons(skb->len);
+	__skb_push(skb, skb->data - skb_network_header(skb));
 
-		if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0)
-			goto drop;
-	} while (!err);
+	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+		ip6_rcv_finish);
+	return -1;
+}
+
+int xfrm6_rcv(struct sk_buff *skb)
+{
+	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+			     0);
+}
+
+EXPORT_SYMBOL(xfrm6_rcv);
+
+int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
+		     xfrm_address_t *saddr, u8 proto)
+{
+	struct net *net = dev_net(skb->dev);
+	struct xfrm_state *x = NULL;
+	int i = 0;
 
 	/* Allocate new secpath or COW existing one. */
 	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
 		struct sec_path *sp;
+
 		sp = secpath_dup(skb->sp);
-		if (!sp)
+		if (!sp) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
 			goto drop;
+		}
 		if (skb->sp)
 			secpath_put(skb->sp);
 		skb->sp = sp;
 	}
 
-	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
+	if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 		goto drop;
+	}
 
-	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
-	skb->sp->len += xfrm_nr;
-	skb->ip_summed = CHECKSUM_NONE;
+	for (i = 0; i < 3; i++) {
+		xfrm_address_t *dst, *src;
 
-	if (decaps) {
-		if (!(skb->dev->flags&IFF_LOOPBACK)) {
-			dst_release(skb->dst);
-			skb->dst = NULL;
+		switch (i) {
+		case 0:
+			dst = daddr;
+			src = saddr;
+			break;
+		case 1:
+			/* lookup state with wild-card source address */
+			dst = daddr;
+			src = (xfrm_address_t *)&in6addr_any;
+			break;
+		default:
+			/* lookup state with wild-card addresses */
+			dst = (xfrm_address_t *)&in6addr_any;
+			src = (xfrm_address_t *)&in6addr_any;
+			break;
 		}
-		netif_rx(skb);
-		return -1;
-	} else {
-		return 1;
+
+		x = xfrm_state_lookup_byaddr(net, skb->mark, dst, src, proto, AF_INET6);
+		if (!x)
+			continue;
+
+		spin_lock(&x->lock);
+
+		if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
+		    likely(x->km.state == XFRM_STATE_VALID) &&
+		    !xfrm_state_check_expire(x)) {
+			spin_unlock(&x->lock);
+			if (x->type->input(x, skb) > 0) {
+				/* found a valid state */
+				break;
+			}
+		} else
+			spin_unlock(&x->lock);
+
+		xfrm_state_put(x);
+		x = NULL;
 	}
 
-drop_unlock:
+	if (!x) {
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+		xfrm_audit_state_notfound_simple(skb, AF_INET6);
+		goto drop;
+	}
+
+	skb->sp->xvec[skb->sp->len++] = x;
+
+	spin_lock(&x->lock);
+
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+
 	spin_unlock(&x->lock);
-	xfrm_state_put(x);
+
+	return 1;
+
 drop:
-	while (--xfrm_nr >= 0)
-		xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
-	kfree_skb(skb);
 	return -1;
 }
 
-EXPORT_SYMBOL(xfrm6_rcv_spi);
-
-int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
-{
-	return xfrm6_rcv_spi(pskb, nhoffp, 0);
-}
+EXPORT_SYMBOL(xfrm6_input_addr);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
new file mode 100644
index 00000000000..9949a356d62
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -0,0 +1,131 @@
+/*
+ * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
+ *
+ * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
+ *                    Miika Komu     <miika@iki.fi>
+ *                    Herbert Xu     <herbert@gondor.apana.org.au>
+ *                    Abhinav Pathak <abhinav.pathak@hiit.fi>
+ *                    Jeff Ahrenholz <ahrenholz@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dsfield.h>
+#include <net/dst.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+static void xfrm6_beet_make_header(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	iph->version = 6;
+
+	memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+	       sizeof(iph->flow_lbl));
+	iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
+
+	ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
+	iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
+}
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
+ */
+static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *top_iph;
+	struct ip_beet_phdr *ph;
+	int optlen, hdr_len;
+
+	hdr_len = 0;
+	optlen = XFRM_MODE_SKB_CB(skb)->optlen;
+	if (unlikely(optlen))
+		hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+	skb_set_network_header(skb, -x->props.header_len - hdr_len);
+	if (x->sel.family != AF_INET6)
+		skb->network_header += IPV4_BEET_PHMAXLEN;
+	skb->mac_header = skb->network_header +
+			  offsetof(struct ipv6hdr, nexthdr);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
+	ph = (struct ip_beet_phdr *)__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl-hdr_len);
+
+	xfrm6_beet_make_header(skb);
+
+	top_iph = ipv6_hdr(skb);
+	if (unlikely(optlen)) {
+
+		BUG_ON(optlen < 0);
+
+		ph->padlen = 4 - (optlen & 4);
+		ph->hdrlen = optlen / 8;
+		ph->nexthdr = top_iph->nexthdr;
+		if (ph->padlen)
+			memset(ph + 1, IPOPT_NOP, ph->padlen);
+
+		top_iph->nexthdr = IPPROTO_BEETPH;
+	}
+
+	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
+	return 0;
+}
+
+static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6h;
+	int size = sizeof(struct ipv6hdr);
+	int err;
+
+	err = skb_cow_head(skb, size + skb->mac_len);
+	if (err)
+		goto out;
+
+	__skb_push(skb, size);
+	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
+	xfrm6_beet_make_header(skb);
+
+	ip6h = ipv6_hdr(skb);
+	ip6h->payload_len = htons(skb->len - size);
+	ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6;
+	ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6;
+	err = 0;
+out:
+	return err;
+}
+
+static struct xfrm_mode xfrm6_beet_mode = {
+	.input2 = xfrm6_beet_input,
+	.input = xfrm_prepare_input,
+	.output2 = xfrm6_beet_output,
+	.output = xfrm6_prepare_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_BEET,
+	.flags = XFRM_MODE_FLAG_TUNNEL,
+};
+
+static int __init xfrm6_beet_init(void)
+{
+	return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6);
+}
+
+static void __exit xfrm6_beet_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6);
+	BUG_ON(err);
+}
+
+module_init(xfrm6_beet_init);
+module_exit(xfrm6_beet_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
new file mode 100644
index 00000000000..0e015906f9c
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -0,0 +1,83 @@
+/*
+ * xfrm6_mode_ro.c - Route optimization mode for IPv6.
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Authors:
+ *	Noriaki TAKAMIYA @USAGI
+ *	Masahide NAKAMURA @USAGI
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/stringify.h>
+#include <linux/time.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add route optimization header space.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the route optimization header.
+ */
+static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	u8 *prevhdr;
+	int hdr_len;
+
+	iph = ipv6_hdr(skb);
+
+	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
+	skb_set_network_header(skb, -x->props.header_len);
+	skb->transport_header = skb->network_header + hdr_len;
+	__skb_pull(skb, hdr_len);
+	memmove(ipv6_hdr(skb), iph, hdr_len);
+
+	x->lastused = get_seconds();
+
+	return 0;
+}
+
+static struct xfrm_mode xfrm6_ro_mode = {
+	.output = xfrm6_ro_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_ROUTEOPTIMIZATION,
+};
+
+static int __init xfrm6_ro_init(void)
+{
+	return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6);
+}
+
+static void __exit xfrm6_ro_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6);
+	BUG_ON(err);
+}
+
+module_init(xfrm6_ro_init);
+module_exit(xfrm6_ro_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
new file mode 100644
index 00000000000..4e344105b3f
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -0,0 +1,85 @@
+/*
+ * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6.
+ *
+ * Copyright (C) 2002 USAGI/WIDE Project
+ * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dst.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add encapsulation header.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the encapsulation header.
+ */
+static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	u8 *prevhdr;
+	int hdr_len;
+
+	iph = ipv6_hdr(skb);
+
+	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
+	skb_set_network_header(skb, -x->props.header_len);
+	skb->transport_header = skb->network_header + hdr_len;
+	__skb_pull(skb, hdr_len);
+	memmove(ipv6_hdr(skb), iph, hdr_len);
+	return 0;
+}
+
+/* Remove encapsulation header.
+ *
+ * The IP header will be moved over the top of the encapsulation header.
+ *
+ * On entry, skb->h shall point to where the IP header should be and skb->nh
+ * shall be set to where the IP header currently is.  skb->data shall point
+ * to the start of the payload.
+ */
+static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int ihl = skb->data - skb_transport_header(skb);
+
+	if (skb->transport_header != skb->network_header) {
+		memmove(skb_transport_header(skb),
+			skb_network_header(skb), ihl);
+		skb->network_header = skb->transport_header;
+	}
+	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
+					   sizeof(struct ipv6hdr));
+	skb_reset_transport_header(skb);
+	return 0;
+}
+
+static struct xfrm_mode xfrm6_transport_mode = {
+	.input = xfrm6_transport_input,
+	.output = xfrm6_transport_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_TRANSPORT,
+};
+
+static int __init xfrm6_transport_init(void)
+{
+	return xfrm_register_mode(&xfrm6_transport_mode, AF_INET6);
+}
+
+static void __exit xfrm6_transport_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm6_transport_mode, AF_INET6);
+	BUG_ON(err);
+}
+
+module_init(xfrm6_transport_init);
+module_exit(xfrm6_transport_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
new file mode 100644
index 00000000000..901ef6f8add
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -0,0 +1,126 @@
+/*
+ * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6.
+ *
+ * Copyright (C) 2002 USAGI/WIDE Project
+ * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dsfield.h>
+#include <net/dst.h>
+#include <net/inet_ecn.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
+{
+	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+	struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
+
+	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
+		IP6_ECN_set_ce(inner_iph);
+}
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per RFC 2401.
+ */
+static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct ipv6hdr *top_iph;
+	int dsfield;
+
+	skb_set_network_header(skb, -x->props.header_len);
+	skb->mac_header = skb->network_header +
+			  offsetof(struct ipv6hdr, nexthdr);
+	skb->transport_header = skb->network_header + sizeof(*top_iph);
+	top_iph = ipv6_hdr(skb);
+
+	top_iph->version = 6;
+
+	memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+	       sizeof(top_iph->flow_lbl));
+	top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
+
+	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+		dsfield = 0;
+	else
+		dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+	dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
+	if (x->props.flags & XFRM_STATE_NOECN)
+		dsfield &= ~INET_ECN_MASK;
+	ipv6_change_dsfield(top_iph, 0, dsfield);
+	top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
+	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
+	return 0;
+}
+
+#define for_each_input_rcu(head, handler)	\
+	for (handler = rcu_dereference(head);	\
+	     handler != NULL;			\
+	     handler = rcu_dereference(handler->next))
+
+
+static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err = -EINVAL;
+
+	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
+		goto out;
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto out;
+
+	err = skb_unclone(skb, GFP_ATOMIC);
+	if (err)
+		goto out;
+
+	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+		ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
+			       ipipv6_hdr(skb));
+	if (!(x->props.flags & XFRM_STATE_NOECN))
+		ipip6_ecn_decapsulate(skb);
+
+	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static struct xfrm_mode xfrm6_tunnel_mode = {
+	.input2 = xfrm6_mode_tunnel_input,
+	.input = xfrm_prepare_input,
+	.output2 = xfrm6_mode_tunnel_output,
+	.output = xfrm6_prepare_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_TUNNEL,
+	.flags = XFRM_MODE_FLAG_TUNNEL,
+};
+
+static int __init xfrm6_mode_tunnel_init(void)
+{
+	return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6);
+}
+
+static void __exit xfrm6_mode_tunnel_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm6_tunnel_mode, AF_INET6);
+	BUG_ON(err);
+}
+
+module_init(xfrm6_mode_tunnel_init);
+module_exit(xfrm6_mode_tunnel_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6b9867717d1..433672d07d0 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -2,143 +2,173 @@
  * xfrm6_output.c - Common IPsec encapsulation code for IPv6.
  * Copyright (C) 2002 USAGI/WIDE Project
  * Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au>
- * 
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/spinlock.h>
 #include <linux/icmpv6.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/dst.h>
 #include <net/ipv6.h>
+#include <net/ip6_route.h>
 #include <net/xfrm.h>
 
-/* Add encapsulation header.
- *
- * In transport mode, the IP header and mutable extension headers will be moved
- * forward to make space for the encapsulation header.
- *
- * In tunnel mode, the top IP header will be constructed per RFC 2401.
- * The following fields in it shall be filled in by x->type->output:
- *	payload_len
- *
- * On exit, skb->h will be set to the start of the encapsulation header to be
- * filled in by x->type->output and skb->nh will be set to the nextheader field
- * of the extension header directly preceding the encapsulation header, or in
- * its absence, that of the top IP header.  The value of skb->data will always
- * point to the top IP header.
- */
-static void xfrm6_encap(struct sk_buff *skb)
+int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
+			  u8 **prevhdr)
 {
-	struct dst_entry *dst = skb->dst;
-	struct xfrm_state *x = dst->xfrm;
-	struct ipv6hdr *iph, *top_iph;
-	int dsfield;
+	return ip6_find_1stfragopt(skb, prevhdr);
+}
+
+EXPORT_SYMBOL(xfrm6_find_1stfragopt);
 
-	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
+static int xfrm6_local_dontfrag(struct sk_buff *skb)
+{
+	int proto;
+	struct sock *sk = skb->sk;
 
-	if (!x->props.mode) {
-		u8 *prevhdr;
-		int hdr_len;
+	if (sk) {
+		if (sk->sk_family != AF_INET6)
+			return 0;
 
-		hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
-		skb->nh.raw = prevhdr - x->props.header_len;
-		skb->h.raw = skb->data + hdr_len;
-		memmove(skb->data, iph, hdr_len);
-		return;
+		proto = sk->sk_protocol;
+		if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
+			return inet6_sk(sk)->dontfrag;
 	}
 
-	skb->nh.raw = skb->data;
-	top_iph = skb->nh.ipv6h;
-	skb->nh.raw = &top_iph->nexthdr;
-	skb->h.ipv6h = top_iph + 1;
-
-	top_iph->version = 6;
-	top_iph->priority = iph->priority;
-	top_iph->flow_lbl[0] = iph->flow_lbl[0];
-	top_iph->flow_lbl[1] = iph->flow_lbl[1];
-	top_iph->flow_lbl[2] = iph->flow_lbl[2];
-	dsfield = ipv6_get_dsfield(top_iph);
-	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
-	if (x->props.flags & XFRM_STATE_NOECN)
-		dsfield &= ~INET_ECN_MASK;
-	ipv6_change_dsfield(top_iph, 0, dsfield);
-	top_iph->nexthdr = IPPROTO_IPV6; 
-	top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
-	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
-	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+	return 0;
+}
+
+static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
+{
+	struct flowi6 fl6;
+	struct sock *sk = skb->sk;
+
+	fl6.flowi6_oif = sk->sk_bound_dev_if;
+	fl6.daddr = ipv6_hdr(skb)->daddr;
+
+	ipv6_local_rxpmtu(sk, &fl6, mtu);
+}
+
+void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
+{
+	struct flowi6 fl6;
+	const struct ipv6hdr *hdr;
+	struct sock *sk = skb->sk;
+
+	hdr = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
+	fl6.fl6_dport = inet_sk(sk)->inet_dport;
+	fl6.daddr = hdr->daddr;
+
+	ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
 }
 
 static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 {
 	int mtu, ret = 0;
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 
 	mtu = dst_mtu(dst);
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if (skb->len > mtu) {
+	if (!skb->ignore_df && skb->len > mtu) {
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+
+		if (xfrm6_local_dontfrag(skb))
+			xfrm6_local_rxpmtu(skb, mtu);
+		else if (skb->sk)
+			xfrm_local_error(skb, mtu);
+		else
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		ret = -EMSGSIZE;
 	}
 
 	return ret;
 }
 
-int xfrm6_output(struct sk_buff *skb)
+int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
-	struct xfrm_state *x = dst->xfrm;
 	int err;
-	
-	if (skb->ip_summed == CHECKSUM_HW) {
-		err = skb_checksum_help(skb, 0);
-		if (err)
-			goto error_nolock;
-	}
 
-	if (x->props.mode) {
-		err = xfrm6_tunnel_check_size(skb);
-		if (err)
-			goto error_nolock;
-	}
-
-	spin_lock_bh(&x->lock);
-	err = xfrm_state_check(x, skb);
+	err = xfrm6_tunnel_check_size(skb);
 	if (err)
-		goto error;
+		return err;
+
+	XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
 
-	xfrm6_encap(skb);
+	return xfrm6_extract_header(skb);
+}
 
-	err = x->type->output(x, skb);
+int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	err = xfrm_inner_extract_output(x, skb);
 	if (err)
-		goto error;
+		return err;
+
+	skb->ignore_df = 1;
+
+	return x->outer_mode->output2(x, skb);
+}
+EXPORT_SYMBOL(xfrm6_prepare_output);
+
+int xfrm6_output_finish(struct sk_buff *skb)
+{
+	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+	skb->protocol = htons(ETH_P_IPV6);
+
+#ifdef CONFIG_NETFILTER
+	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+#endif
 
-	x->curlft.bytes += skb->len;
-	x->curlft.packets++;
+	return xfrm_output(skb);
+}
 
-	spin_unlock_bh(&x->lock);
+static int __xfrm6_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct xfrm_state *x = dst->xfrm;
+	int mtu;
 
-	skb->nh.raw = skb->data;
-	
-	if (!(skb->dst = dst_pop(dst))) {
-		err = -EHOSTUNREACH;
-		goto error_nolock;
+#ifdef CONFIG_NETFILTER
+	if (!x) {
+		IP6CB(skb)->flags |= IP6SKB_REROUTED;
+		return dst_output(skb);
+	}
+#endif
+
+	if (skb->protocol == htons(ETH_P_IPV6))
+		mtu = ip6_skb_dst_mtu(skb);
+	else
+		mtu = dst_mtu(skb_dst(skb));
+
+	if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
+		xfrm6_local_rxpmtu(skb, mtu);
+		return -EMSGSIZE;
+	} else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
+		xfrm_local_error(skb, mtu);
+		return -EMSGSIZE;
 	}
-	err = NET_XMIT_BYPASS;
-
-out_exit:
-	return err;
-error:
-	spin_unlock_bh(&x->lock);
-error_nolock:
-	kfree_skb(skb);
-	goto out_exit;
+
+	if (x->props.mode == XFRM_MODE_TUNNEL &&
+	    ((skb->len > mtu && !skb_is_gso(skb)) ||
+		dst_allfrag(skb_dst(skb)))) {
+			return ip6_fragment(skb, x->outer_mode->afinfo->output_finish);
+	}
+	return x->outer_mode->afinfo->output_finish(skb);
+}
+
+int xfrm6_output(struct sock *sk, struct sk_buff *skb)
+{
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb,
+			    NULL, skb_dst(skb)->dev, __xfrm6_output,
+			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index cf1d91e74c8..2a0bbda2c76 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -8,257 +8,234 @@
  * 		IPv6 support
  * 	YOSHIFUJI Hideaki
  * 		Split up af-specific portion
- * 
+ *
  */
 
-#include <asm/bug.h>
-#include <linux/compiler.h>
-#include <linux/config.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <net/addrconf.h>
+#include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+#include <net/mip6.h>
+#endif
 
-static struct dst_ops xfrm6_dst_ops;
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
 
-static struct xfrm_type_map xfrm6_type_map = { .lock = RW_LOCK_UNLOCKED };
-
-static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
+static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
+					  const xfrm_address_t *saddr,
+					  const xfrm_address_t *daddr)
 {
-	int err = 0;
-	*dst = (struct xfrm_dst*)ip6_route_output(NULL, fl);
-	if (!*dst)
-		err = -ENETUNREACH;
-	return err;
+	struct flowi6 fl6;
+	struct dst_entry *dst;
+	int err;
+
+	memset(&fl6, 0, sizeof(fl6));
+	memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
+	if (saddr)
+		memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
+
+	dst = ip6_route_output(net, NULL, &fl6);
+
+	err = dst->error;
+	if (dst->error) {
+		dst_release(dst);
+		dst = ERR_PTR(err);
+	}
+
+	return dst;
 }
 
-static struct dst_entry *
-__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
+static int xfrm6_get_saddr(struct net *net,
+			   xfrm_address_t *saddr, xfrm_address_t *daddr)
 {
 	struct dst_entry *dst;
+	struct net_device *dev;
 
-	/* Still not clear if we should set fl->fl6_{src,dst}... */
-	read_lock_bh(&policy->lock);
-	for (dst = policy->bundles; dst; dst = dst->next) {
-		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
-		struct in6_addr fl_dst_prefix, fl_src_prefix;
-
-		ipv6_addr_prefix(&fl_dst_prefix,
-				 &fl->fl6_dst,
-				 xdst->u.rt6.rt6i_dst.plen);
-		ipv6_addr_prefix(&fl_src_prefix,
-				 &fl->fl6_src,
-				 xdst->u.rt6.rt6i_src.plen);
-		if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
-		    ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
-		    xfrm_bundle_ok(xdst, fl, AF_INET6)) {
-			dst_clone(dst);
-			break;
-		}
-	}
-	read_unlock_bh(&policy->lock);
-	return dst;
+	dst = xfrm6_dst_lookup(net, 0, NULL, daddr);
+	if (IS_ERR(dst))
+		return -EHOSTUNREACH;
+
+	dev = ip6_dst_idev(dst)->dev;
+	ipv6_dev_get_saddr(dev_net(dev), dev,
+			   (struct in6_addr *)&daddr->a6, 0,
+			   (struct in6_addr *)&saddr->a6);
+	dst_release(dst);
+	return 0;
 }
 
-/* Allocate chain of dst_entry's, attach known xfrm's, calculate
- * all the metrics... Shortly, bundle a bundle.
- */
+static int xfrm6_get_tos(const struct flowi *fl)
+{
+	return 0;
+}
 
-static int
-__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
-		      struct flowi *fl, struct dst_entry **dst_p)
+static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
 {
-	struct dst_entry *dst, *dst_prev;
-	struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
-	struct rt6_info *rt  = rt0;
-	struct in6_addr *remote = &fl->fl6_dst;
-	struct in6_addr *local  = &fl->fl6_src;
-	struct flowi fl_tunnel = {
-		.nl_u = {
-			.ip6_u = {
-				.saddr = *local,
-				.daddr = *remote
-			}
-		}
-	};
-	int i;
-	int err = 0;
-	int header_len = 0;
-	int trailer_len = 0;
-
-	dst = dst_prev = NULL;
-	dst_hold(&rt->u.dst);
-
-	for (i = 0; i < nx; i++) {
-		struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops);
-		struct xfrm_dst *xdst;
-		int tunnel = 0;
-
-		if (unlikely(dst1 == NULL)) {
-			err = -ENOBUFS;
-			dst_release(&rt->u.dst);
-			goto error;
-		}
+	struct rt6_info *rt = (struct rt6_info *)xdst;
 
-		if (!dst)
-			dst = dst1;
-		else {
-			dst_prev->child = dst1;
-			dst1->flags |= DST_NOHASH;
-			dst_clone(dst1);
-		}
+	rt6_init_peer(rt, net->ipv6.peers);
+}
 
-		xdst = (struct xfrm_dst *)dst1;
-		xdst->route = &rt->u.dst;
+static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+			   int nfheader_len)
+{
+	if (dst->ops->family == AF_INET6) {
+		struct rt6_info *rt = (struct rt6_info*)dst;
 		if (rt->rt6i_node)
-			xdst->route_cookie = rt->rt6i_node->fn_sernum;
-
-		dst1->next = dst_prev;
-		dst_prev = dst1;
-		if (xfrm[i]->props.mode) {
-			remote = (struct in6_addr*)&xfrm[i]->id.daddr;
-			local  = (struct in6_addr*)&xfrm[i]->props.saddr;
-			tunnel = 1;
-		}
-		header_len += xfrm[i]->props.header_len;
-		trailer_len += xfrm[i]->props.trailer_len;
-
-		if (tunnel) {
-			ipv6_addr_copy(&fl_tunnel.fl6_dst, remote);
-			ipv6_addr_copy(&fl_tunnel.fl6_src, local);
-			err = xfrm_dst_lookup((struct xfrm_dst **) &rt,
-					      &fl_tunnel, AF_INET6);
-			if (err)
-				goto error;
-		} else
-			dst_hold(&rt->u.dst);
+			path->path_cookie = rt->rt6i_node->fn_sernum;
 	}
 
-	dst_prev->child = &rt->u.dst;
-	dst->path = &rt->u.dst;
-	if (rt->rt6i_node)
-		((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum;
-
-	*dst_p = dst;
-	dst = dst_prev;
-
-	dst_prev = *dst_p;
-	i = 0;
-	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
-		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
-
-		dst_prev->xfrm = xfrm[i++];
-		dst_prev->dev = rt->u.dst.dev;
-		if (rt->u.dst.dev)
-			dev_hold(rt->u.dst.dev);
-		dst_prev->obsolete	= -1;
-		dst_prev->flags	       |= DST_HOST;
-		dst_prev->lastuse	= jiffies;
-		dst_prev->header_len	= header_len;
-		dst_prev->trailer_len	= trailer_len;
-		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
-
-		/* Copy neighbour for reachability confirmation */
-		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
-		dst_prev->input		= rt->u.dst.input;
-		dst_prev->output	= xfrm6_output;
-		/* Sheit... I remember I did this right. Apparently,
-		 * it was magically lost, so this code needs audit */
-		x->u.rt6.rt6i_flags    = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
-		x->u.rt6.rt6i_metric   = rt0->rt6i_metric;
-		x->u.rt6.rt6i_node     = rt0->rt6i_node;
-		x->u.rt6.rt6i_gateway  = rt0->rt6i_gateway;
-		memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); 
-		x->u.rt6.rt6i_dst      = rt0->rt6i_dst;
-		x->u.rt6.rt6i_src      = rt0->rt6i_src;	
-		x->u.rt6.rt6i_idev     = rt0->rt6i_idev;
-		in6_dev_hold(rt0->rt6i_idev);
-		header_len -= x->u.dst.xfrm->props.header_len;
-		trailer_len -= x->u.dst.xfrm->props.trailer_len;
-	}
+	path->u.rt6.rt6i_nfheader_len = nfheader_len;
 
-	xfrm_init_pmtu(dst);
 	return 0;
+}
 
-error:
-	if (dst)
-		dst_free(dst);
-	return err;
+static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+			  const struct flowi *fl)
+{
+	struct rt6_info *rt = (struct rt6_info*)xdst->route;
+
+	xdst->u.dst.dev = dev;
+	dev_hold(dev);
+
+	xdst->u.rt6.rt6i_idev = in6_dev_get(dev);
+	if (!xdst->u.rt6.rt6i_idev) {
+		dev_put(dev);
+		return -ENODEV;
+	}
+
+	rt6_transfer_peer(&xdst->u.rt6, rt);
+
+	/* Sheit... I remember I did this right. Apparently,
+	 * it was magically lost, so this code needs audit */
+	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
+						   RTF_LOCAL);
+	xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
+	xdst->u.rt6.rt6i_node = rt->rt6i_node;
+	if (rt->rt6i_node)
+		xdst->route_cookie = rt->rt6i_node->fn_sernum;
+	xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
+	xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
+	xdst->u.rt6.rt6i_src = rt->rt6i_src;
+
+	return 0;
 }
 
 static inline void
-_decode_session6(struct sk_buff *skb, struct flowi *fl)
+_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 {
-	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
-	u8 nexthdr = skb->nh.ipv6h->nexthdr;
-
-	memset(fl, 0, sizeof(struct flowi));
-	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
-	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
+	struct flowi6 *fl6 = &fl->u.ip6;
+	int onlyproto = 0;
+	u16 offset = skb_network_header_len(skb);
+	const struct ipv6hdr *hdr = ipv6_hdr(skb);
+	struct ipv6_opt_hdr *exthdr;
+	const unsigned char *nh = skb_network_header(skb);
+	u8 nexthdr = nh[IP6CB(skb)->nhoff];
+	int oif = 0;
+
+	if (skb_dst(skb))
+		oif = skb_dst(skb)->dev->ifindex;
+
+	memset(fl6, 0, sizeof(struct flowi6));
+	fl6->flowi6_mark = skb->mark;
+	fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
+
+	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
+	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
+
+	while (nh + offset + 1 < skb->data ||
+	       pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+		nh = skb_network_header(skb);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 
-	while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
 		switch (nexthdr) {
+		case NEXTHDR_FRAGMENT:
+			onlyproto = 1;
 		case NEXTHDR_ROUTING:
 		case NEXTHDR_HOP:
 		case NEXTHDR_DEST:
 			offset += ipv6_optlen(exthdr);
 			nexthdr = exthdr->nexthdr;
-			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 			break;
 
 		case IPPROTO_UDP:
+		case IPPROTO_UDPLITE:
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
-				u16 *ports = (u16 *)exthdr;
+		case IPPROTO_DCCP:
+			if (!onlyproto && (nh + offset + 4 < skb->data ||
+			     pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
+				__be16 *ports = (__be16 *)exthdr;
 
-				fl->fl_ip_sport = ports[0];
-				fl->fl_ip_dport = ports[1];
+				fl6->fl6_sport = ports[!!reverse];
+				fl6->fl6_dport = ports[!reverse];
 			}
-			fl->proto = nexthdr;
+			fl6->flowi6_proto = nexthdr;
 			return;
 
 		case IPPROTO_ICMPV6:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
+			if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
 				u8 *icmp = (u8 *)exthdr;
 
-				fl->fl_icmp_type = icmp[0];
-				fl->fl_icmp_code = icmp[1];
+				fl6->fl6_icmp_type = icmp[0];
+				fl6->fl6_icmp_code = icmp[1];
 			}
-			fl->proto = nexthdr;
+			fl6->flowi6_proto = nexthdr;
 			return;
 
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+		case IPPROTO_MH:
+			if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
+				struct ip6_mh *mh;
+				mh = (struct ip6_mh *)exthdr;
+
+				fl6->fl6_mh_type = mh->ip6mh_type;
+			}
+			fl6->flowi6_proto = nexthdr;
+			return;
+#endif
+
 		/* XXX Why are there these headers? */
 		case IPPROTO_AH:
 		case IPPROTO_ESP:
 		case IPPROTO_COMP:
 		default:
-			fl->fl_ipsec_spi = 0;
-			fl->proto = nexthdr;
+			fl6->fl6_ipsec_spi = 0;
+			fl6->flowi6_proto = nexthdr;
 			return;
-		};
+		}
 	}
 }
 
-static inline int xfrm6_garbage_collect(void)
+static inline int xfrm6_garbage_collect(struct dst_ops *ops)
 {
-	read_lock(&xfrm6_policy_afinfo.lock);
-	xfrm6_policy_afinfo.garbage_collect();
-	read_unlock(&xfrm6_policy_afinfo.lock);
-	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
+	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
+
+	xfrm6_policy_afinfo.garbage_collect(net);
+	return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
 }
 
-static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			      struct sk_buff *skb, u32 mtu)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 	struct dst_entry *path = xdst->route;
 
-	path->ops->update_pmtu(path, mtu);
+	path->ops->update_pmtu(path, sk, skb, mtu);
+}
+
+static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+			   struct sk_buff *skb)
+{
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+	struct dst_entry *path = xdst->route;
+
+	path->ops->redirect(path, sk, skb);
 }
 
 static void xfrm6_dst_destroy(struct dst_entry *dst)
@@ -267,6 +244,11 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
 
 	if (likely(xdst->u.rt6.rt6i_idev))
 		in6_dev_put(xdst->u.rt6.rt6i_idev);
+	dst_destroy_metrics_generic(dst);
+	if (rt6_has_peer(&xdst->u.rt6)) {
+		struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
+		inet_putpeer(peer);
+	}
 	xfrm_dst_destroy(xdst);
 }
 
@@ -280,7 +262,8 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 	xdst = (struct xfrm_dst *)dst;
 	if (xdst->u.rt6.rt6i_idev->dev == dev) {
-		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
+		struct inet6_dev *loopback_idev =
+			in6_dev_get(dev_net(dev)->loopback_dev);
 		BUG_ON(!loopback_idev);
 
 		do {
@@ -298,29 +281,33 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm6_dst_ops = {
 	.family =		AF_INET6,
-	.protocol =		__constant_htons(ETH_P_IPV6),
+	.protocol =		cpu_to_be16(ETH_P_IPV6),
 	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
+	.redirect =		xfrm6_redirect,
+	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
-	.gc_thresh =		1024,
-	.entry_size =		sizeof(struct xfrm_dst),
+	.local_out =		__ip6_local_out,
+	.gc_thresh =		32768,
 };
 
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.family =		AF_INET6,
-	.lock = 		RW_LOCK_UNLOCKED,
-	.type_map = 		&xfrm6_type_map,
 	.dst_ops =		&xfrm6_dst_ops,
 	.dst_lookup =		xfrm6_dst_lookup,
-	.find_bundle =		__xfrm6_find_bundle,
-	.bundle_create =	__xfrm6_bundle_create,
+	.get_saddr = 		xfrm6_get_saddr,
 	.decode_session =	_decode_session6,
+	.get_tos =		xfrm6_get_tos,
+	.init_dst =		xfrm6_init_dst,
+	.init_path =		xfrm6_init_path,
+	.fill_dst =		xfrm6_fill_dst,
+	.blackhole_route =	ip6_blackhole_route,
 };
 
-static void __init xfrm6_policy_init(void)
+static int __init xfrm6_policy_init(void)
 {
-	xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
+	return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
 }
 
 static void xfrm6_policy_fini(void)
@@ -328,15 +315,103 @@ static void xfrm6_policy_fini(void)
 	xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
 }
 
-void __init xfrm6_init(void)
+#ifdef CONFIG_SYSCTL
+static struct ctl_table xfrm6_policy_table[] = {
+	{
+		.procname       = "xfrm6_gc_thresh",
+		.data	   	= &init_net.xfrm.xfrm6_dst_ops.gc_thresh,
+		.maxlen	 	= sizeof(int),
+		.mode	   	= 0644,
+		.proc_handler   = proc_dointvec,
+	},
+	{ }
+};
+
+static int __net_init xfrm6_net_init(struct net *net)
+{
+	struct ctl_table *table;
+	struct ctl_table_header *hdr;
+
+	table = xfrm6_policy_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL);
+		if (!table)
+			goto err_alloc;
+
+		table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh;
+	}
+
+	hdr = register_net_sysctl(net, "net/ipv6", table);
+	if (!hdr)
+		goto err_reg;
+
+	net->ipv6.sysctl.xfrm6_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
 {
-	xfrm6_policy_init();
-	xfrm6_state_init();
+	struct ctl_table *table;
+
+	if (net->ipv6.sysctl.xfrm6_hdr == NULL)
+		return;
+
+	table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+static struct pernet_operations xfrm6_net_ops = {
+	.init	= xfrm6_net_init,
+	.exit	= xfrm6_net_exit,
+};
+#endif
+
+int __init xfrm6_init(void)
+{
+	int ret;
+
+	dst_entries_init(&xfrm6_dst_ops);
+
+	ret = xfrm6_policy_init();
+	if (ret) {
+		dst_entries_destroy(&xfrm6_dst_ops);
+		goto out;
+	}
+	ret = xfrm6_state_init();
+	if (ret)
+		goto out_policy;
+
+	ret = xfrm6_protocol_init();
+	if (ret)
+		goto out_state;
+
+#ifdef CONFIG_SYSCTL
+	register_pernet_subsys(&xfrm6_net_ops);
+#endif
+out:
+	return ret;
+out_state:
+	xfrm6_state_fini();
+out_policy:
+	xfrm6_policy_fini();
+	goto out;
 }
 
 void xfrm6_fini(void)
 {
-	//xfrm6_input_fini();
+#ifdef CONFIG_SYSCTL
+	unregister_pernet_subsys(&xfrm6_net_ops);
+#endif
+	xfrm6_protocol_fini();
 	xfrm6_policy_fini();
 	xfrm6_state_fini();
+	dst_entries_destroy(&xfrm6_dst_ops);
 }
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
new file mode 100644
index 00000000000..54d13f8dbba
--- /dev/null
+++ b/net/ipv6/xfrm6_protocol.c
@@ -0,0 +1,279 @@
+/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/xfrm4_protocol.c
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm6_protocol_mutex);
+
+static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
+{
+	switch (protocol) {
+	case IPPROTO_ESP:
+		return &esp6_handlers;
+	case IPPROTO_AH:
+		return &ah6_handlers;
+	case IPPROTO_COMP:
+		return &ipcomp6_handlers;
+	}
+
+	return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler)		\
+	for (handler = rcu_dereference(head);		\
+	     handler != NULL;				\
+	     handler = rcu_dereference(handler->next))	\
+
+int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+	int ret;
+	struct xfrm6_protocol *handler;
+	struct xfrm6_protocol __rcu **head = proto_handlers(protocol);
+
+	if (!head)
+		return 0;
+
+	for_each_protocol_rcu(*proto_handlers(protocol), handler)
+		if ((ret = handler->cb_handler(skb, err)) <= 0)
+			return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_cb);
+
+static int xfrm6_esp_rcv(struct sk_buff *skb)
+{
+	int ret;
+	struct xfrm6_protocol *handler;
+
+	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+	for_each_protocol_rcu(esp6_handlers, handler)
+		if ((ret = handler->handler(skb)) != -EINVAL)
+			return ret;
+
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			  u8 type, u8 code, int offset, __be32 info)
+{
+	struct xfrm6_protocol *handler;
+
+	for_each_protocol_rcu(esp6_handlers, handler)
+		if (!handler->err_handler(skb, opt, type, code, offset, info))
+			break;
+}
+
+static int xfrm6_ah_rcv(struct sk_buff *skb)
+{
+	int ret;
+	struct xfrm6_protocol *handler;
+
+	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+	for_each_protocol_rcu(ah6_handlers, handler)
+		if ((ret = handler->handler(skb)) != -EINVAL)
+			return ret;
+
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			 u8 type, u8 code, int offset, __be32 info)
+{
+	struct xfrm6_protocol *handler;
+
+	for_each_protocol_rcu(ah6_handlers, handler)
+		if (!handler->err_handler(skb, opt, type, code, offset, info))
+			break;
+}
+
+static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
+{
+	int ret;
+	struct xfrm6_protocol *handler;
+
+	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+	for_each_protocol_rcu(ipcomp6_handlers, handler)
+		if ((ret = handler->handler(skb)) != -EINVAL)
+			return ret;
+
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			     u8 type, u8 code, int offset, __be32 info)
+{
+	struct xfrm6_protocol *handler;
+
+	for_each_protocol_rcu(ipcomp6_handlers, handler)
+		if (!handler->err_handler(skb, opt, type, code, offset, info))
+			break;
+}
+
+static const struct inet6_protocol esp6_protocol = {
+	.handler	=	xfrm6_esp_rcv,
+	.err_handler	=	xfrm6_esp_err,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ah6_protocol = {
+	.handler	=	xfrm6_ah_rcv,
+	.err_handler	=	xfrm6_ah_err,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ipcomp6_protocol = {
+	.handler	=	xfrm6_ipcomp_rcv,
+	.err_handler	=	xfrm6_ipcomp_err,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+static struct xfrm_input_afinfo xfrm6_input_afinfo = {
+	.family		=	AF_INET6,
+	.owner		=	THIS_MODULE,
+	.callback	=	xfrm6_rcv_cb,
+};
+
+static inline const struct inet6_protocol *netproto(unsigned char protocol)
+{
+	switch (protocol) {
+	case IPPROTO_ESP:
+		return &esp6_protocol;
+	case IPPROTO_AH:
+		return &ah6_protocol;
+	case IPPROTO_COMP:
+		return &ipcomp6_protocol;
+	}
+
+	return NULL;
+}
+
+int xfrm6_protocol_register(struct xfrm6_protocol *handler,
+			    unsigned char protocol)
+{
+	struct xfrm6_protocol __rcu **pprev;
+	struct xfrm6_protocol *t;
+	bool add_netproto = false;
+	int ret = -EEXIST;
+	int priority = handler->priority;
+
+	if (!proto_handlers(protocol) || !netproto(protocol))
+		return -EINVAL;
+
+	mutex_lock(&xfrm6_protocol_mutex);
+
+	if (!rcu_dereference_protected(*proto_handlers(protocol),
+				       lockdep_is_held(&xfrm6_protocol_mutex)))
+		add_netproto = true;
+
+	for (pprev = proto_handlers(protocol);
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t->priority < priority)
+			break;
+		if (t->priority == priority)
+			goto err;
+	}
+
+	handler->next = *pprev;
+	rcu_assign_pointer(*pprev, handler);
+
+	ret = 0;
+
+err:
+	mutex_unlock(&xfrm6_protocol_mutex);
+
+	if (add_netproto) {
+		if (inet6_add_protocol(netproto(protocol), protocol)) {
+			pr_err("%s: can't add protocol\n", __func__);
+			ret = -EAGAIN;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_register);
+
+int xfrm6_protocol_deregister(struct xfrm6_protocol *handler,
+			      unsigned char protocol)
+{
+	struct xfrm6_protocol __rcu **pprev;
+	struct xfrm6_protocol *t;
+	int ret = -ENOENT;
+
+	if (!proto_handlers(protocol) || !netproto(protocol))
+		return -EINVAL;
+
+	mutex_lock(&xfrm6_protocol_mutex);
+
+	for (pprev = proto_handlers(protocol);
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t == handler) {
+			*pprev = handler->next;
+			ret = 0;
+			break;
+		}
+	}
+
+	if (!rcu_dereference_protected(*proto_handlers(protocol),
+				       lockdep_is_held(&xfrm6_protocol_mutex))) {
+		if (inet6_del_protocol(netproto(protocol), protocol) < 0) {
+			pr_err("%s: can't remove protocol\n", __func__);
+			ret = -EAGAIN;
+		}
+	}
+
+	mutex_unlock(&xfrm6_protocol_mutex);
+
+	synchronize_net();
+
+	return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_deregister);
+
+int __init xfrm6_protocol_init(void)
+{
+	return xfrm_input_register_afinfo(&xfrm6_input_afinfo);
+}
+
+void xfrm6_protocol_fini(void)
+{
+	xfrm_input_unregister_afinfo(&xfrm6_input_afinfo);
+}
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index bf0d0abc387..3fc970135fc 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -8,33 +8,42 @@
  * 		IPv6 support
  * 	YOSHIFUJI Hideaki @USAGI
  * 		Split up af-specific portion
- * 	
+ *
  */
 
 #include <net/xfrm.h>
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/export.h>
+#include <net/dsfield.h>
 #include <net/ipv6.h>
-
-static struct xfrm_state_afinfo xfrm6_state_afinfo;
+#include <net/addrconf.h>
 
 static void
-__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		     struct xfrm_tmpl *tmpl,
-		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
 {
+	const struct flowi6 *fl6 = &fl->u.ip6;
+
 	/* Initialize temporary selector matching only
 	 * to current session. */
-	ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
-	ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
-	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = ~0;
-	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = ~0;
-	x->sel.prefixlen_d = 128;
-	x->sel.prefixlen_s = 128;
-	x->sel.proto = fl->proto;
-	x->sel.ifindex = fl->oif;
+	*(struct in6_addr *)&sel->daddr = fl6->daddr;
+	*(struct in6_addr *)&sel->saddr = fl6->saddr;
+	sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET6;
+	sel->prefixlen_d = 128;
+	sel->prefixlen_s = 128;
+	sel->proto = fl6->flowi6_proto;
+	sel->ifindex = fl6->flowi6_oif;
+}
+
+static void
+xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
+		   const xfrm_address_t *daddr, const xfrm_address_t *saddr)
+{
 	x->id = tmpl->id;
 	if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
 		memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
@@ -46,87 +55,140 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.family = AF_INET6;
 }
 
-static struct xfrm_state *
-__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
+/* distribution counting sort function for xfrm_state and xfrm_tmpl */
+static int
+__xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
 {
-	unsigned h = __xfrm6_spi_hash(daddr, spi, proto);
-	struct xfrm_state *x;
-
-	list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) {
-		if (x->props.family == AF_INET6 &&
-		    spi == x->id.spi &&
-		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
-		    proto == x->id.proto) {
-			xfrm_state_hold(x);
-			return x;
-		}
+	int i;
+	int class[XFRM_MAX_DEPTH];
+	int count[maxclass];
+
+	memset(count, 0, sizeof(count));
+
+	for (i = 0; i < n; i++) {
+		int c;
+		class[i] = c = cmp(src[i]);
+		count[c]++;
 	}
-	return NULL;
+
+	for (i = 2; i < maxclass; i++)
+		count[i] += count[i - 1];
+
+	for (i = 0; i < n; i++) {
+		dst[count[class[i] - 1]++] = src[i];
+		src[i] = NULL;
+	}
+
+	return 0;
 }
 
-static struct xfrm_state *
-__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, 
-		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
-		 int create)
+/*
+ * Rule for xfrm_state:
+ *
+ * rule 1: select IPsec transport except AH
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec transport AH
+ * rule 4: select IPsec tunnel
+ * rule 5: others
+ */
+static int __xfrm6_state_sort_cmp(void *p)
 {
-	struct xfrm_state *x, *x0;
-	unsigned h = __xfrm6_dst_hash(daddr);
-
-	x0 = NULL;
-
-	list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) {
-		if (x->props.family == AF_INET6 &&
-		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
-		    mode == x->props.mode &&
-		    proto == x->id.proto &&
-		    ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
-		    reqid == x->props.reqid &&
-		    x->km.state == XFRM_STATE_ACQ &&
-		    !x->id.spi) {
-			    x0 = x;
-			    break;
-		    }
+	struct xfrm_state *v = p;
+
+	switch (v->props.mode) {
+	case XFRM_MODE_TRANSPORT:
+		if (v->id.proto != IPPROTO_AH)
+			return 1;
+		else
+			return 3;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	case XFRM_MODE_ROUTEOPTIMIZATION:
+	case XFRM_MODE_IN_TRIGGER:
+		return 2;
+#endif
+	case XFRM_MODE_TUNNEL:
+	case XFRM_MODE_BEET:
+		return 4;
 	}
-	if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
-		ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6,
-			       (struct in6_addr *)daddr);
-		ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6,
-			       (struct in6_addr *)saddr);
-		x0->sel.prefixlen_d = 128;
-		x0->sel.prefixlen_s = 128;
-		ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6,
-			       (struct in6_addr *)saddr);
-		x0->km.state = XFRM_STATE_ACQ;
-		ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6,
-			       (struct in6_addr *)daddr);
-		x0->id.proto = proto;
-		x0->props.family = AF_INET6;
-		x0->props.mode = mode;
-		x0->props.reqid = reqid;
-		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
-		xfrm_state_hold(x0);
-		x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
-		add_timer(&x0->timer);
-		xfrm_state_hold(x0);
-		list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
-		wake_up(&km_waitq);
+	return 5;
+}
+
+static int
+__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
+{
+	return __xfrm6_sort((void **)dst, (void **)src, n,
+			    __xfrm6_state_sort_cmp, 6);
+}
+
+/*
+ * Rule for xfrm_tmpl:
+ *
+ * rule 1: select IPsec transport
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec tunnel
+ * rule 4: others
+ */
+static int __xfrm6_tmpl_sort_cmp(void *p)
+{
+	struct xfrm_tmpl *v = p;
+	switch (v->mode) {
+	case XFRM_MODE_TRANSPORT:
+		return 1;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	case XFRM_MODE_ROUTEOPTIMIZATION:
+	case XFRM_MODE_IN_TRIGGER:
+		return 2;
+#endif
+	case XFRM_MODE_TUNNEL:
+	case XFRM_MODE_BEET:
+		return 3;
 	}
-	if (x0)
-		xfrm_state_hold(x0);
-	return x0;
+	return 4;
+}
+
+static int
+__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
+{
+	return __xfrm6_sort((void **)dst, (void **)src, n,
+			    __xfrm6_tmpl_sort_cmp, 5);
+}
+
+int xfrm6_extract_header(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
+	XFRM_MODE_SKB_CB(skb)->id = 0;
+	XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
+	XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
+	XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
+	XFRM_MODE_SKB_CB(skb)->optlen = 0;
+	memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
+	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+
+	return 0;
 }
 
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
-	.lock			= RW_LOCK_UNLOCKED,
+	.proto			= IPPROTO_IPV6,
+	.eth_proto		= htons(ETH_P_IPV6),
+	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
-	.state_lookup		= __xfrm6_state_lookup,
-	.find_acq		= __xfrm6_find_acq,
+	.init_temprop		= xfrm6_init_temprop,
+	.tmpl_sort		= __xfrm6_tmpl_sort,
+	.state_sort		= __xfrm6_state_sort,
+	.output			= xfrm6_output,
+	.output_finish		= xfrm6_output_finish,
+	.extract_input		= xfrm6_extract_input,
+	.extract_output		= xfrm6_extract_output,
+	.transport_finish	= xfrm6_transport_finish,
+	.local_error		= xfrm6_local_error,
 };
 
-void __init xfrm6_state_init(void)
+int __init xfrm6_state_init(void)
 {
-	xfrm_state_register_afinfo(&xfrm6_state_afinfo);
+	return xfrm_state_register_afinfo(&xfrm6_state_afinfo);
 }
 
 void xfrm6_state_fini(void)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index fbef7826a74..1c66465a42d 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -5,15 +5,14 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
  *
  * Authors	Mitsuru KANDA  <mk@linux-ipv6.org>
  * 		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
@@ -21,435 +20,251 @@
  * Based on net/ipv4/xfrm4_tunnel.c
  *
  */
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/xfrm.h>
-#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/ipv6.h>
-#include <net/protocol.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
+#include <linux/mutex.h>
+#include <net/netns/generic.h>
 
-#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
-# define X6TDEBUG	3
-#else
-# define X6TDEBUG	1
-#endif
+#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
+#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
 
-#define X6TPRINTK(fmt, args...)		printk(fmt, ## args)
-#define X6TNOPRINTK(fmt, args...)	do { ; } while(0)
+#define XFRM6_TUNNEL_SPI_MIN	1
+#define XFRM6_TUNNEL_SPI_MAX	0xffffffff
 
-#if X6TDEBUG >= 1
-# define X6TPRINTK1	X6TPRINTK
-#else
-# define X6TPRINTK1	X6TNOPRINTK
-#endif
+struct xfrm6_tunnel_net {
+	struct hlist_head spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
+	struct hlist_head spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
+	u32 spi;
+};
 
-#if X6TDEBUG >= 3
-# define X6TPRINTK3	X6TPRINTK
-#else
-# define X6TPRINTK3	X6TNOPRINTK
-#endif
+static int xfrm6_tunnel_net_id __read_mostly;
+static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net)
+{
+	return net_generic(net, xfrm6_tunnel_net_id);
+}
 
 /*
- * xfrm_tunnel_spi things are for allocating unique id ("spi") 
+ * xfrm_tunnel_spi things are for allocating unique id ("spi")
  * per xfrm_address_t.
  */
 struct xfrm6_tunnel_spi {
-	struct hlist_node list_byaddr;
-	struct hlist_node list_byspi;
-	xfrm_address_t addr;
-	u32 spi;
-	atomic_t refcnt;
-#ifdef XFRM6_TUNNEL_SPI_MAGIC
-	u32 magic;
-#endif
+	struct hlist_node	list_byaddr;
+	struct hlist_node	list_byspi;
+	xfrm_address_t		addr;
+	u32			spi;
+	atomic_t		refcnt;
+	struct rcu_head		rcu_head;
 };
 
-#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
-# define XFRM6_TUNNEL_SPI_MAGIC 0xdeadbeef
-#endif
-
-static DEFINE_RWLOCK(xfrm6_tunnel_spi_lock);
-
-static u32 xfrm6_tunnel_spi;
+static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock);
 
-#define XFRM6_TUNNEL_SPI_MIN	1
-#define XFRM6_TUNNEL_SPI_MAX	0xffffffff
+static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
 
-static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly;
-
-#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
-#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
-
-static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
-static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
-
-#ifdef XFRM6_TUNNEL_SPI_MAGIC
-static int x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
-			     const char *name)
-{
-	if (unlikely(x6spi->magic != XFRM6_TUNNEL_SPI_MAGIC)) {
-		X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
-				      "at %p has corrupted magic %08x "
-				      "(should be %08x)\n",
-			   name, x6spi, x6spi->magic, XFRM6_TUNNEL_SPI_MAGIC);
-		return -1;
-	}
-	return 0;
-}
-#else
-static int inline x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
-				    const char *name)
-{
-	return 0;
-}
-#endif
-
-#define X6SPI_CHECK_MAGIC(x6spi) x6spi_check_magic((x6spi), __FUNCTION__)
-
-
-static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
+static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr)
 {
-	unsigned h;
-
-	X6TPRINTK3(KERN_DEBUG "%s(addr=%p)\n", __FUNCTION__, addr);
+	unsigned int h;
 
-	h = addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3];
+	h = ipv6_addr_hash((const struct in6_addr *)addr);
 	h ^= h >> 16;
 	h ^= h >> 8;
 	h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
 
-	X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, h);
-
 	return h;
 }
 
-static unsigned inline xfrm6_tunnel_spi_hash_byspi(u32 spi)
+static inline unsigned int xfrm6_tunnel_spi_hash_byspi(u32 spi)
 {
 	return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
 }
 
-
-static int xfrm6_tunnel_spi_init(void)
-{
-	int i;
-
-	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
-
-	xfrm6_tunnel_spi = 0;
-	xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
-						  sizeof(struct xfrm6_tunnel_spi),
-						  0, SLAB_HWCACHE_ALIGN,
-						  NULL, NULL);
-	if (!xfrm6_tunnel_spi_kmem) {
-		X6TPRINTK1(KERN_ERR
-			   "%s(): failed to allocate xfrm6_tunnel_spi_kmem\n",
-		           __FUNCTION__);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
-		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byaddr[i]);
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
-		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byspi[i]);
-	return 0;
-}
-
-static void xfrm6_tunnel_spi_fini(void)
-{
-	int i;
-
-	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
-
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) {
-		if (!hlist_empty(&xfrm6_tunnel_spi_byaddr[i]))
-			goto err;
-	}
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) {
-		if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i]))
-			goto err;
-	}
-	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
-	xfrm6_tunnel_spi_kmem = NULL;
-	return;
-err:
-	X6TPRINTK1(KERN_ERR "%s(): table is not empty\n", __FUNCTION__);
-	return;
-}
-
-static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
+static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
-	struct hlist_node *pos;
-
-	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
 
-	hlist_for_each_entry(x6spi, pos,
-			     &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+	hlist_for_each_entry_rcu(x6spi,
+			     &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 			     list_byaddr) {
-		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
-			X6SPI_CHECK_MAGIC(x6spi);
-			X6TPRINTK3(KERN_DEBUG "%s() = %p(%u)\n", __FUNCTION__, x6spi, x6spi->spi);
+		if (xfrm6_addr_equal(&x6spi->addr, saddr))
 			return x6spi;
-		}
 	}
 
-	X6TPRINTK3(KERN_DEBUG "%s() = NULL(0)\n", __FUNCTION__);
 	return NULL;
 }
 
-u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
+__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_spi *x6spi;
 	u32 spi;
 
-	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
-
-	read_lock_bh(&xfrm6_tunnel_spi_lock);
-	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
+	rcu_read_lock_bh();
+	x6spi = __xfrm6_tunnel_spi_lookup(net, saddr);
 	spi = x6spi ? x6spi->spi : 0;
-	read_unlock_bh(&xfrm6_tunnel_spi_lock);
-	return spi;
+	rcu_read_unlock_bh();
+	return htonl(spi);
 }
 
 EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
 
-static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
+static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi)
 {
-	u32 spi;
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
-	struct hlist_node *pos;
-	unsigned index;
+	int index = xfrm6_tunnel_spi_hash_byspi(spi);
+
+	hlist_for_each_entry(x6spi,
+			     &xfrm6_tn->spi_byspi[index],
+			     list_byspi) {
+		if (x6spi->spi == spi)
+			return -1;
+	}
+	return index;
+}
 
-	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
+static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
+{
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+	u32 spi;
+	struct xfrm6_tunnel_spi *x6spi;
+	int index;
 
-	if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN ||
-	    xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX)
-		xfrm6_tunnel_spi = XFRM6_TUNNEL_SPI_MIN;
+	if (xfrm6_tn->spi < XFRM6_TUNNEL_SPI_MIN ||
+	    xfrm6_tn->spi >= XFRM6_TUNNEL_SPI_MAX)
+		xfrm6_tn->spi = XFRM6_TUNNEL_SPI_MIN;
 	else
-		xfrm6_tunnel_spi++;
-
-	for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
-		index = xfrm6_tunnel_spi_hash_byspi(spi);
-		hlist_for_each_entry(x6spi, pos, 
-				     &xfrm6_tunnel_spi_byspi[index], 
-				     list_byspi) {
-			if (x6spi->spi == spi)
-				goto try_next_1;
-		}
-		xfrm6_tunnel_spi = spi;
-		goto alloc_spi;
-try_next_1:;
+		xfrm6_tn->spi++;
+
+	for (spi = xfrm6_tn->spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
+		index = __xfrm6_tunnel_spi_check(net, spi);
+		if (index >= 0)
+			goto alloc_spi;
 	}
-	for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) {
-		index = xfrm6_tunnel_spi_hash_byspi(spi);
-		hlist_for_each_entry(x6spi, pos, 
-				     &xfrm6_tunnel_spi_byspi[index], 
-				     list_byspi) {
-			if (x6spi->spi == spi)
-				goto try_next_2;
-		}
-		xfrm6_tunnel_spi = spi;
-		goto alloc_spi;
-try_next_2:;
+	for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) {
+		index = __xfrm6_tunnel_spi_check(net, spi);
+		if (index >= 0)
+			goto alloc_spi;
 	}
 	spi = 0;
 	goto out;
 alloc_spi:
-	X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for "
-			      "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 
-			      __FUNCTION__, 
-			      NIP6(*(struct in6_addr *)saddr));
-	x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC);
-	if (!x6spi) {
-		X6TPRINTK1(KERN_ERR "%s(): kmem_cache_alloc() failed\n", 
-			   __FUNCTION__);
+	xfrm6_tn->spi = spi;
+	x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC);
+	if (!x6spi)
 		goto out;
-	}
-#ifdef XFRM6_TUNNEL_SPI_MAGIC
-	x6spi->magic = XFRM6_TUNNEL_SPI_MAGIC;
-#endif
+
 	memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
 	x6spi->spi = spi;
 	atomic_set(&x6spi->refcnt, 1);
 
-	hlist_add_head(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
+	hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tn->spi_byspi[index]);
 
 	index = xfrm6_tunnel_spi_hash_byaddr(saddr);
-	hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
-	X6SPI_CHECK_MAGIC(x6spi);
+	hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tn->spi_byaddr[index]);
 out:
-	X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
 	return spi;
 }
 
-u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
+__be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_spi *x6spi;
 	u32 spi;
 
-	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
-
-	write_lock_bh(&xfrm6_tunnel_spi_lock);
-	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
+	spin_lock_bh(&xfrm6_tunnel_spi_lock);
+	x6spi = __xfrm6_tunnel_spi_lookup(net, saddr);
 	if (x6spi) {
 		atomic_inc(&x6spi->refcnt);
 		spi = x6spi->spi;
 	} else
-		spi = __xfrm6_tunnel_alloc_spi(saddr);
-	write_unlock_bh(&xfrm6_tunnel_spi_lock);
-
-	X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
+		spi = __xfrm6_tunnel_alloc_spi(net, saddr);
+	spin_unlock_bh(&xfrm6_tunnel_spi_lock);
 
-	return spi;
+	return htonl(spi);
 }
 
 EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi);
 
-void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
+static void x6spi_destroy_rcu(struct rcu_head *head)
 {
-	struct xfrm6_tunnel_spi *x6spi;
-	struct hlist_node *pos, *n;
+	kmem_cache_free(xfrm6_tunnel_spi_kmem,
+			container_of(head, struct xfrm6_tunnel_spi, rcu_head));
+}
 
-	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
+static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
+{
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+	struct xfrm6_tunnel_spi *x6spi;
+	struct hlist_node *n;
 
-	write_lock_bh(&xfrm6_tunnel_spi_lock);
+	spin_lock_bh(&xfrm6_tunnel_spi_lock);
 
-	hlist_for_each_entry_safe(x6spi, pos, n, 
-				  &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+	hlist_for_each_entry_safe(x6spi, n,
+				  &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 				  list_byaddr)
 	{
-		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
-			X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
-					      "for %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
-					      "found at %p\n",
-				   __FUNCTION__, 
-				   NIP6(*(struct in6_addr *)saddr),
-				   x6spi);
-			X6SPI_CHECK_MAGIC(x6spi);
+		if (xfrm6_addr_equal(&x6spi->addr, saddr)) {
 			if (atomic_dec_and_test(&x6spi->refcnt)) {
-				hlist_del(&x6spi->list_byaddr);
-				hlist_del(&x6spi->list_byspi);
-				kmem_cache_free(xfrm6_tunnel_spi_kmem, x6spi);
+				hlist_del_rcu(&x6spi->list_byaddr);
+				hlist_del_rcu(&x6spi->list_byspi);
+				call_rcu(&x6spi->rcu_head, x6spi_destroy_rcu);
 				break;
 			}
 		}
 	}
-	write_unlock_bh(&xfrm6_tunnel_spi_lock);
+	spin_unlock_bh(&xfrm6_tunnel_spi_lock);
 }
 
-EXPORT_SYMBOL(xfrm6_tunnel_free_spi);
-
 static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct ipv6hdr *top_iph;
-
-	top_iph = (struct ipv6hdr *)skb->data;
-	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
-
+	skb_push(skb, -skb_network_offset(skb));
 	return 0;
 }
 
-static int xfrm6_tunnel_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	return 0;
+	return skb_network_header(skb)[IP6CB(skb)->nhoff];
 }
 
-static struct xfrm6_tunnel *xfrm6_tunnel_handler;
-static DECLARE_MUTEX(xfrm6_tunnel_sem);
-
-int xfrm6_tunnel_register(struct xfrm6_tunnel *handler)
+static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 {
-	int ret;
+	struct net *net = dev_net(skb->dev);
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	__be32 spi;
 
-	down(&xfrm6_tunnel_sem);
-	ret = 0;
-	if (xfrm6_tunnel_handler != NULL)
-		ret = -EINVAL;
-	if (!ret)
-		xfrm6_tunnel_handler = handler;
-	up(&xfrm6_tunnel_sem);
-
-	return ret;
-}
-
-EXPORT_SYMBOL(xfrm6_tunnel_register);
-
-int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler)
-{
-	int ret;
-
-	down(&xfrm6_tunnel_sem);
-	ret = 0;
-	if (xfrm6_tunnel_handler != handler)
-		ret = -EINVAL;
-	if (!ret)
-		xfrm6_tunnel_handler = NULL;
-	up(&xfrm6_tunnel_sem);
-
-	synchronize_net();
-
-	return ret;
+	spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
+	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi);
 }
 
-EXPORT_SYMBOL(xfrm6_tunnel_deregister);
-
-static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
-{
-	struct sk_buff *skb = *pskb;
-	struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
-	struct ipv6hdr *iph = skb->nh.ipv6h;
-	u32 spi;
-
-	/* device-like_ip6ip6_handler() */
-	if (handler && handler->handler(pskb, nhoffp) == 0)
-		return 0;
-
-	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
-	return xfrm6_rcv_spi(pskb, nhoffp, spi);
-}
-
-static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-			     int type, int code, int offset, __u32 info)
+static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			    u8 type, u8 code, int offset, __be32 info)
 {
-	struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
-
-	/* call here first for device-like ip6ip6 err handling */
-	if (handler) {
-		handler->err_handler(skb, opt, type, code, offset, info);
-		return;
-	}
-
 	/* xfrm6_tunnel native err handling */
 	switch (type) {
-	case ICMPV6_DEST_UNREACH: 
+	case ICMPV6_DEST_UNREACH:
 		switch (code) {
-		case ICMPV6_NOROUTE: 
+		case ICMPV6_NOROUTE:
 		case ICMPV6_ADM_PROHIBITED:
 		case ICMPV6_NOT_NEIGHBOUR:
 		case ICMPV6_ADDR_UNREACH:
 		case ICMPV6_PORT_UNREACH:
 		default:
-			X6TPRINTK3(KERN_DEBUG
-				   "xfrm6_tunnel: Destination Unreach.\n");
 			break;
 		}
 		break;
 	case ICMPV6_PKT_TOOBIG:
-			X6TPRINTK3(KERN_DEBUG 
-				   "xfrm6_tunnel: Packet Too Big.\n");
 		break;
 	case ICMPV6_TIME_EXCEED:
 		switch (code) {
 		case ICMPV6_EXC_HOPLIMIT:
-			X6TPRINTK3(KERN_DEBUG
-				   "xfrm6_tunnel: Too small Hoplimit.\n");
 			break;
 		case ICMPV6_EXC_FRAGTIME:
-		default: 
+		default:
 			break;
 		}
 		break;
@@ -463,12 +278,13 @@ static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	default:
 		break;
 	}
-	return;
+
+	return 0;
 }
 
 static int xfrm6_tunnel_init_state(struct xfrm_state *x)
 {
-	if (!x->props.mode)
+	if (x->props.mode != XFRM_MODE_TUNNEL)
 		return -EINVAL;
 
 	if (x->encap)
@@ -481,10 +297,12 @@ static int xfrm6_tunnel_init_state(struct xfrm_state *x)
 
 static void xfrm6_tunnel_destroy(struct xfrm_state *x)
 {
-	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
+	struct net *net = xs_net(x);
+
+	xfrm6_tunnel_free_spi(net, (xfrm_address_t *)&x->props.saddr);
 }
 
-static struct xfrm_type xfrm6_tunnel_type = {
+static const struct xfrm_type xfrm6_tunnel_type = {
 	.description	= "IP6IP6",
 	.owner          = THIS_MODULE,
 	.proto		= IPPROTO_IPV6,
@@ -494,50 +312,88 @@ static struct xfrm_type xfrm6_tunnel_type = {
 	.output		= xfrm6_tunnel_output,
 };
 
-static struct inet6_protocol xfrm6_tunnel_protocol = {
+static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = {
+	.handler	= xfrm6_tunnel_rcv,
+	.err_handler	= xfrm6_tunnel_err,
+	.priority	= 2,
+};
+
+static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = {
 	.handler	= xfrm6_tunnel_rcv,
-	.err_handler	= xfrm6_tunnel_err, 
-	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+	.err_handler	= xfrm6_tunnel_err,
+	.priority	= 2,
+};
+
+static int __net_init xfrm6_tunnel_net_init(struct net *net)
+{
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+	unsigned int i;
+
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&xfrm6_tn->spi_byaddr[i]);
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
+		INIT_HLIST_HEAD(&xfrm6_tn->spi_byspi[i]);
+	xfrm6_tn->spi = 0;
+
+	return 0;
+}
+
+static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
+{
+}
+
+static struct pernet_operations xfrm6_tunnel_net_ops = {
+	.init	= xfrm6_tunnel_net_init,
+	.exit	= xfrm6_tunnel_net_exit,
+	.id	= &xfrm6_tunnel_net_id,
+	.size	= sizeof(struct xfrm6_tunnel_net),
 };
 
 static int __init xfrm6_tunnel_init(void)
 {
-	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
+	int rv;
 
-	if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) {
-		X6TPRINTK1(KERN_ERR
-			   "xfrm6_tunnel init: can't add xfrm type\n");
-		return -EAGAIN;
-	}
-	if (inet6_add_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0) {
-		X6TPRINTK1(KERN_ERR
-			   "xfrm6_tunnel init(): can't add protocol\n");
-		xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
-		return -EAGAIN;
-	}
-	if (xfrm6_tunnel_spi_init() < 0) {
-		X6TPRINTK1(KERN_ERR
-			   "xfrm6_tunnel init: failed to initialize spi\n");
-		inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6);
-		xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
-		return -EAGAIN;
-	}
+	xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
+						  sizeof(struct xfrm6_tunnel_spi),
+						  0, SLAB_HWCACHE_ALIGN,
+						  NULL);
+	if (!xfrm6_tunnel_spi_kmem)
+		return -ENOMEM;
+	rv = register_pernet_subsys(&xfrm6_tunnel_net_ops);
+	if (rv < 0)
+		goto out_pernet;
+	rv = xfrm_register_type(&xfrm6_tunnel_type, AF_INET6);
+	if (rv < 0)
+		goto out_type;
+	rv = xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6);
+	if (rv < 0)
+		goto out_xfrm6;
+	rv = xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET);
+	if (rv < 0)
+		goto out_xfrm46;
 	return 0;
+
+out_xfrm46:
+	xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
+out_xfrm6:
+	xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
+out_type:
+	unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+out_pernet:
+	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
+	return rv;
 }
 
 static void __exit xfrm6_tunnel_fini(void)
 {
-	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
-
-	xfrm6_tunnel_spi_fini();
-	if (inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0)
-		X6TPRINTK1(KERN_ERR 
-			   "xfrm6_tunnel close: can't remove protocol\n");
-	if (xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6) < 0)
-		X6TPRINTK1(KERN_ERR
-			   "xfrm6_tunnel close: can't remove xfrm type\n");
+	xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET);
+	xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
+	xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
+	unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
 }
 
 module_init(xfrm6_tunnel_init);
 module_exit(xfrm6_tunnel_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);