Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1075 commits) myri10ge: update driver version number to 1.4.3-1.369 r8169: add shutdown handler r8169: preliminary 8168d support r8169: support additional 8168cp chipset r8169: change default behavior for mildly identified 8168c chipsets r8169: add a new 8168cp flavor r8169: add a new 8168c flavor (bis) r8169: add a new 8168c flavor r8169: sync existing 8168 device hardware start sequences with vendor driver r8169: 8168b Tx performance tweak r8169: make room for more specific 8168 hardware start procedure r8169: shuffle some registers handling around (8168 operation only) r8169: new phy init parameters for the 8168b r8169: update phy init parameters r8169: wake up the PHY of the 8168 af_key: fix SADB_X_SPDDELETE response ath9k: Fix return code when ath9k_hw_setpower() fails on reset ath9k: remove nasty FAIL macro from ath9k_hw_reset() gre: minor cleanups in netlink interface gre: fix copy and paste error ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-10-11 09:33:18 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-10-11 09:33:18 -0700
commit: 4dd9ec4946b4651a295d3bc8df9c15ac692a8f4e (patch)
tree: afb300c752de7175bb2df4722d5c857e070c75d9 /net/netfilter
parent: 86ed5a93b8b56e4e0877b914af0e10883a196384 (diff)
parent: 6861ff35ec5b60fafaf8651754c9a75142bfa9a4 (diff)
101 files changed, 16898 insertions, 1944 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ee898e74808..78892cf2b02 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -38,10 +38,11 @@ config NF_CONNTRACK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+if NF_CONNTRACK
+
 config NF_CT_ACCT
 	bool "Connection tracking flow accounting"
 	depends on NETFILTER_ADVANCED
-	depends on NF_CONNTRACK
 	help
 	  If this option is enabled, the connection tracking code will
 	  keep per-flow packet and byte counters.
@@ -63,7 +64,6 @@ config NF_CT_ACCT
 config NF_CONNTRACK_MARK
 	bool  'Connection mark tracking support'
 	depends on NETFILTER_ADVANCED
-	depends on NF_CONNTRACK
 	help
 	  This option enables support for connection marks, used by the
 	  `CONNMARK' target and `connmark' match. Similar to the mark value
@@ -72,7 +72,7 @@ config NF_CONNTRACK_MARK
 
 config NF_CONNTRACK_SECMARK
 	bool  'Connection tracking security mark support'
-	depends on NF_CONNTRACK && NETWORK_SECMARK
+	depends on NETWORK_SECMARK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables security markings to be applied to
@@ -85,7 +85,6 @@ config NF_CONNTRACK_SECMARK
 
 config NF_CONNTRACK_EVENTS
 	bool "Connection tracking events"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  If this option is enabled, the connection tracking code will
@@ -96,7 +95,7 @@ config NF_CONNTRACK_EVENTS
 
 config NF_CT_PROTO_DCCP
 	tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on EXPERIMENTAL
 	depends on NETFILTER_ADVANCED
 	default IP_DCCP
 	help
@@ -107,11 +106,10 @@ config NF_CT_PROTO_DCCP
 
 config NF_CT_PROTO_GRE
 	tristate
-	depends on NF_CONNTRACK
 
 config NF_CT_PROTO_SCTP
 	tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on EXPERIMENTAL
 	depends on NETFILTER_ADVANCED
 	default IP_SCTP
 	help
@@ -123,7 +121,6 @@ config NF_CT_PROTO_SCTP
 
 config NF_CT_PROTO_UDPLITE
 	tristate 'UDP-Lite protocol connection tracking support'
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  With this option enabled, the layer 3 independent connection
@@ -134,7 +131,6 @@ config NF_CT_PROTO_UDPLITE
 
 config NF_CONNTRACK_AMANDA
 	tristate "Amanda backup protocol support"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select TEXTSEARCH
 	select TEXTSEARCH_KMP
@@ -150,7 +146,6 @@ config NF_CONNTRACK_AMANDA
 
 config NF_CONNTRACK_FTP
 	tristate "FTP protocol support"
-	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Tracking FTP connections is problematic: special helpers are
@@ -165,7 +160,7 @@ config NF_CONNTRACK_FTP
 
 config NF_CONNTRACK_H323
 	tristate "H.323 protocol support"
-	depends on NF_CONNTRACK && (IPV6 || IPV6=n)
+	depends on (IPV6 || IPV6=n)
 	depends on NETFILTER_ADVANCED
 	help
 	  H.323 is a VoIP signalling protocol from ITU-T. As one of the most
@@ -185,7 +180,6 @@ config NF_CONNTRACK_H323
 
 config NF_CONNTRACK_IRC
 	tristate "IRC protocol support"
-	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  There is a commonly-used extension to IRC called
@@ -201,7 +195,6 @@ config NF_CONNTRACK_IRC
 
 config NF_CONNTRACK_NETBIOS_NS
 	tristate "NetBIOS name service protocol support"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  NetBIOS name service requests are sent as broadcast messages from an
@@ -221,7 +214,6 @@ config NF_CONNTRACK_NETBIOS_NS
 
 config NF_CONNTRACK_PPTP
 	tristate "PPtP protocol support"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_CT_PROTO_GRE
 	help
@@ -241,7 +233,7 @@ config NF_CONNTRACK_PPTP
 
 config NF_CONNTRACK_SANE
 	tristate "SANE protocol support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on EXPERIMENTAL
 	depends on NETFILTER_ADVANCED
 	help
 	  SANE is a protocol for remote access to scanners as implemented
@@ -255,7 +247,6 @@ config NF_CONNTRACK_SANE
 
 config NF_CONNTRACK_SIP
 	tristate "SIP protocol support"
-	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  SIP is an application-layer control protocol that can establish,
@@ -268,7 +259,6 @@ config NF_CONNTRACK_SIP
 
 config NF_CONNTRACK_TFTP
 	tristate "TFTP protocol support"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  TFTP connection tracking helper, this is required depending
@@ -280,13 +270,29 @@ config NF_CONNTRACK_TFTP
 
 config NF_CT_NETLINK
 	tristate 'Connection tracking netlink interface'
-	depends on NF_CONNTRACK
 	select NETFILTER_NETLINK
 	depends on NF_NAT=n || NF_NAT
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables support for a netlink-based userspace interface
 
+# transparent proxy support
+config NETFILTER_TPROXY
+	tristate "Transparent proxying support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	depends on IP_NF_MANGLE
+	depends on NETFILTER_ADVANCED
+	help
+	  This option enables transparent proxying support, that is,
+	  support for handling non-locally bound IPv4 TCP and UDP sockets.
+	  For it to work you will have to configure certain iptables rules
+	  and use policy routing. For more information on how to set it up
+	  see Documentation/networking/tproxy.txt.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+endif # NF_CONNTRACK
+
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
 	default m if NETFILTER_ADVANCED=n
@@ -294,11 +300,12 @@ config NETFILTER_XTABLES
 	  This is required if you intend to use any of ip_tables,
 	  ip6_tables or arp_tables.
 
+if NETFILTER_XTABLES
+
 # alphabetically ordered list of targets
 
 config NETFILTER_XT_TARGET_CLASSIFY
 	tristate '"CLASSIFY" target support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `CLASSIFY' target, which enables the user to set
@@ -311,8 +318,6 @@ config NETFILTER_XT_TARGET_CLASSIFY
 
 config NETFILTER_XT_TARGET_CONNMARK
 	tristate  '"CONNMARK" target support'
-	depends on NETFILTER_XTABLES
-	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_MARK
@@ -325,9 +330,20 @@ config NETFILTER_XT_TARGET_CONNMARK
 	  <file:Documentation/kbuild/modules.txt>.  The module will be called
 	  ipt_CONNMARK.ko.  If unsure, say `N'.
 
+config NETFILTER_XT_TARGET_CONNSECMARK
+	tristate '"CONNSECMARK" target support'
+	depends on NF_CONNTRACK && NF_CONNTRACK_SECMARK
+	default m if NETFILTER_ADVANCED=n
+	help
+	  The CONNSECMARK target copies security markings from packets
+	  to connections, and restores security markings from connections
+	  to packets (if the packets are not already marked).  This would
+	  normally be used in conjunction with the SECMARK target.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_DSCP
 	tristate '"DSCP" and "TOS" target support'
-	depends on NETFILTER_XTABLES
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NETFILTER_ADVANCED
 	help
@@ -344,7 +360,6 @@ config NETFILTER_XT_TARGET_DSCP
 
 config NETFILTER_XT_TARGET_MARK
 	tristate '"MARK" target support'
-	depends on NETFILTER_XTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `MARK' target, which allows you to create rules
@@ -356,21 +371,8 @@ config NETFILTER_XT_TARGET_MARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NETFILTER_XT_TARGET_NFQUEUE
-	tristate '"NFQUEUE" target Support'
-	depends on NETFILTER_XTABLES
-	depends on NETFILTER_ADVANCED
-	help
-	  This target replaced the old obsolete QUEUE target.
-
-	  As opposed to QUEUE, it supports 65535 different queues,
-	  not just one.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NETFILTER_XT_TARGET_NFLOG
 	tristate '"NFLOG" target support'
-	depends on NETFILTER_XTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables the NFLOG target, which allows to LOG
@@ -380,9 +382,19 @@ config NETFILTER_XT_TARGET_NFLOG
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_NFQUEUE
+	tristate '"NFQUEUE" target Support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This target replaced the old obsolete QUEUE target.
+
+	  As opposed to QUEUE, it supports 65535 different queues,
+	  not just one.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_NOTRACK
 	tristate  '"NOTRACK" target support'
-	depends on NETFILTER_XTABLES
 	depends on IP_NF_RAW || IP6_NF_RAW
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
@@ -397,7 +409,6 @@ config NETFILTER_XT_TARGET_NOTRACK
 
 config NETFILTER_XT_TARGET_RATEEST
 	tristate '"RATEEST" target support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `RATEEST' target, which allows to measure
@@ -406,9 +417,23 @@ config NETFILTER_XT_TARGET_RATEEST
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_TPROXY
+	tristate '"TPROXY" target support (EXPERIMENTAL)'
+	depends on EXPERIMENTAL
+	depends on NETFILTER_TPROXY
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	select NF_DEFRAG_IPV4
+	help
+	  This option adds a `TPROXY' target, which is somewhat similar to
+	  REDIRECT.  It can only be used in the mangle table and is useful
+	  to redirect traffic to a transparent proxy.  It does _not_ depend
+	  on Netfilter connection tracking and NAT, unlike REDIRECT.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_TRACE
 	tristate  '"TRACE" target support'
-	depends on NETFILTER_XTABLES
 	depends on IP_NF_RAW || IP6_NF_RAW
 	depends on NETFILTER_ADVANCED
 	help
@@ -421,7 +446,7 @@ config NETFILTER_XT_TARGET_TRACE
 
 config NETFILTER_XT_TARGET_SECMARK
 	tristate '"SECMARK" target support'
-	depends on NETFILTER_XTABLES && NETWORK_SECMARK
+	depends on NETWORK_SECMARK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  The SECMARK target allows security marking of network
@@ -429,21 +454,9 @@ config NETFILTER_XT_TARGET_SECMARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NETFILTER_XT_TARGET_CONNSECMARK
-	tristate '"CONNSECMARK" target support'
-	depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
-	default m if NETFILTER_ADVANCED=n
-	help
-	  The CONNSECMARK target copies security markings from packets
-	  to connections, and restores security markings from connections
-	  to packets (if the packets are not already marked).  This would
-	  normally be used in conjunction with the SECMARK target.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NETFILTER_XT_TARGET_TCPMSS
 	tristate '"TCPMSS" target support'
-	depends on NETFILTER_XTABLES && (IPV6 || IPV6=n)
+	depends on (IPV6 || IPV6=n)
 	default m if NETFILTER_ADVANCED=n
 	---help---
 	  This option adds a `TCPMSS' target, which allows you to alter the
@@ -470,7 +483,7 @@ config NETFILTER_XT_TARGET_TCPMSS
 
 config NETFILTER_XT_TARGET_TCPOPTSTRIP
 	tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && NETFILTER_XTABLES
+	depends on EXPERIMENTAL
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NETFILTER_ADVANCED
 	help
@@ -479,7 +492,6 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
 
 config NETFILTER_XT_MATCH_COMMENT
 	tristate  '"comment" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `comment' dummy-match, which allows you to put
@@ -490,7 +502,6 @@ config NETFILTER_XT_MATCH_COMMENT
 
 config NETFILTER_XT_MATCH_CONNBYTES
 	tristate  '"connbytes" per-connection counter match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_CT_ACCT
@@ -503,7 +514,6 @@ config NETFILTER_XT_MATCH_CONNBYTES
 
 config NETFILTER_XT_MATCH_CONNLIMIT
 	tristate '"connlimit" match support"'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	---help---
@@ -512,7 +522,6 @@ config NETFILTER_XT_MATCH_CONNLIMIT
 
 config NETFILTER_XT_MATCH_CONNMARK
 	tristate  '"connmark" connection mark match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_MARK
@@ -526,7 +535,6 @@ config NETFILTER_XT_MATCH_CONNMARK
 
 config NETFILTER_XT_MATCH_CONNTRACK
 	tristate '"conntrack" connection tracking match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
@@ -540,7 +548,6 @@ config NETFILTER_XT_MATCH_CONNTRACK
 
 config NETFILTER_XT_MATCH_DCCP
 	tristate '"dccp" protocol match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	default IP_DCCP
 	help
@@ -553,7 +560,6 @@ config NETFILTER_XT_MATCH_DCCP
 
 config NETFILTER_XT_MATCH_DSCP
 	tristate '"dscp" and "tos" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `DSCP' match, which allows you to match against
@@ -569,7 +575,6 @@ config NETFILTER_XT_MATCH_DSCP
 
 config NETFILTER_XT_MATCH_ESP
 	tristate '"esp" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This match extension allows you to match a range of SPIs
@@ -577,9 +582,23 @@ config NETFILTER_XT_MATCH_ESP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_HASHLIMIT
+	tristate '"hashlimit" match support'
+	depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `hashlimit' match.
+
+	  As opposed to `limit', this match dynamically creates a hash table
+	  of limit buckets, based on your selection of source/destination
+	  addresses and/or ports.
+
+	  It enables you to express policies like `10kpps for any given
+	  destination address' or `500pps from any given source address'
+	  with a single rule.
+
 config NETFILTER_XT_MATCH_HELPER
 	tristate '"helper" match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
@@ -590,7 +609,6 @@ config NETFILTER_XT_MATCH_HELPER
 
 config NETFILTER_XT_MATCH_IPRANGE
 	tristate '"iprange" address range match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	---help---
 	This option adds a "iprange" match, which allows you to match based on
@@ -601,7 +619,6 @@ config NETFILTER_XT_MATCH_IPRANGE
 
 config NETFILTER_XT_MATCH_LENGTH
 	tristate '"length" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option allows you to match the length of a packet against a
@@ -611,7 +628,6 @@ config NETFILTER_XT_MATCH_LENGTH
 
 config NETFILTER_XT_MATCH_LIMIT
 	tristate '"limit" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  limit matching allows you to control the rate at which a rule can be
@@ -622,7 +638,6 @@ config NETFILTER_XT_MATCH_LIMIT
 
 config NETFILTER_XT_MATCH_MAC
 	tristate '"mac" address match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  MAC matching allows you to match packets based on the source
@@ -632,7 +647,6 @@ config NETFILTER_XT_MATCH_MAC
 
 config NETFILTER_XT_MATCH_MARK
 	tristate '"mark" match support'
-	depends on NETFILTER_XTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Netfilter mark matching allows you to match packets based on the
@@ -641,9 +655,18 @@ config NETFILTER_XT_MATCH_MARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_MULTIPORT
+	tristate '"multiport" Multiple port match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  Multiport matching allows you to match TCP or UDP packets based on
+	  a series of source or destination ports: normally a rule can only
+	  match a single range of ports.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_OWNER
 	tristate '"owner" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	---help---
 	Socket owner matching allows you to match locally-generated packets
@@ -652,7 +675,7 @@ config NETFILTER_XT_MATCH_OWNER
 
 config NETFILTER_XT_MATCH_POLICY
 	tristate 'IPsec "policy" match support'
-	depends on NETFILTER_XTABLES && XFRM
+	depends on XFRM
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Policy matching allows you to match packets based on the
@@ -661,20 +684,9 @@ config NETFILTER_XT_MATCH_POLICY
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NETFILTER_XT_MATCH_MULTIPORT
-	tristate '"multiport" Multiple port match support'
-	depends on NETFILTER_XTABLES
-	depends on NETFILTER_ADVANCED
-	help
-	  Multiport matching allows you to match TCP or UDP packets based on
-	  a series of source or destination ports: normally a rule can only
-	  match a single range of ports.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NETFILTER_XT_MATCH_PHYSDEV
 	tristate '"physdev" match support'
-	depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
+	depends on BRIDGE && BRIDGE_NETFILTER
 	depends on NETFILTER_ADVANCED
 	help
 	  Physdev packet matching matches against the physical bridge ports
@@ -684,7 +696,6 @@ config NETFILTER_XT_MATCH_PHYSDEV
 
 config NETFILTER_XT_MATCH_PKTTYPE
 	tristate '"pkttype" packet type match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  Packet type matching allows you to match a packet by
@@ -697,7 +708,6 @@ config NETFILTER_XT_MATCH_PKTTYPE
 
 config NETFILTER_XT_MATCH_QUOTA
 	tristate '"quota" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `quota' match, which allows to match on a
@@ -708,7 +718,6 @@ config NETFILTER_XT_MATCH_QUOTA
 
 config NETFILTER_XT_MATCH_RATEEST
 	tristate '"rateest" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	select NETFILTER_XT_TARGET_RATEEST
 	help
@@ -719,7 +728,6 @@ config NETFILTER_XT_MATCH_RATEEST
 
 config NETFILTER_XT_MATCH_REALM
 	tristate  '"realm" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	select NET_CLS_ROUTE
 	help
@@ -732,9 +740,26 @@ config NETFILTER_XT_MATCH_REALM
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_RECENT
+	tristate '"recent" match support'
+	depends on NETFILTER_ADVANCED
+	---help---
+	This match is used for creating one or many lists of recently
+	used addresses and then matching against that/those list(s).
+
+	Short options are available by using 'iptables -m recent -h'
+	Official Website: <http://snowman.net/projects/ipt_recent/>
+
+config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	bool 'Enable obsolete /proc/net/ipt_recent'
+	depends on NETFILTER_XT_MATCH_RECENT && PROC_FS
+	---help---
+	This option enables the old /proc/net/ipt_recent interface,
+	which has been obsoleted by /proc/net/xt_recent.
+
 config NETFILTER_XT_MATCH_SCTP
 	tristate  '"sctp" protocol match support (EXPERIMENTAL)'
-	depends on NETFILTER_XTABLES && EXPERIMENTAL
+	depends on EXPERIMENTAL
 	depends on NETFILTER_ADVANCED
 	default IP_SCTP
 	help
@@ -745,9 +770,23 @@ config NETFILTER_XT_MATCH_SCTP
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_SOCKET
+	tristate '"socket" match support (EXPERIMENTAL)'
+	depends on EXPERIMENTAL
+	depends on NETFILTER_TPROXY
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	select NF_DEFRAG_IPV4
+	help
+	  This option adds a `socket' match, which can be used to match
+	  packets for which a TCP or UDP socket lookup finds a valid socket.
+	  It can be used in combination with the MARK target and policy
+	  routing to implement full featured non-locally bound sockets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_STATE
 	tristate '"state" match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
@@ -759,7 +798,6 @@ config NETFILTER_XT_MATCH_STATE
 
 config NETFILTER_XT_MATCH_STATISTIC
 	tristate '"statistic" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `statistic' match, which allows you to match
@@ -769,7 +807,6 @@ config NETFILTER_XT_MATCH_STATISTIC
 
 config NETFILTER_XT_MATCH_STRING
 	tristate  '"string" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	select TEXTSEARCH
 	select TEXTSEARCH_KMP
@@ -783,7 +820,6 @@ config NETFILTER_XT_MATCH_STRING
 
 config NETFILTER_XT_MATCH_TCPMSS
 	tristate '"tcpmss" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `tcpmss' match, which allows you to examine the
@@ -794,7 +830,6 @@ config NETFILTER_XT_MATCH_TCPMSS
 
 config NETFILTER_XT_MATCH_TIME
 	tristate '"time" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	---help---
 	  This option adds a "time" match, which allows you to match based on
@@ -809,7 +844,6 @@ config NETFILTER_XT_MATCH_TIME
 
 config NETFILTER_XT_MATCH_U32
 	tristate '"u32" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	---help---
 	  u32 allows you to extract quantities of up to 4 bytes from a packet,
@@ -821,20 +855,8 @@ config NETFILTER_XT_MATCH_U32
 
 	  Details and examples are in the kernel module source.
 
-config NETFILTER_XT_MATCH_HASHLIMIT
-	tristate '"hashlimit" match support'
-	depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
-	depends on NETFILTER_ADVANCED
-	help
-	  This option adds a `hashlimit' match.
-
-	  As opposed to `limit', this match dynamically creates a hash table
-	  of limit buckets, based on your selection of source/destination
-	  addresses and/or ports.
-
-	  It enables you to express policies like `10kpps for any given
-	  destination address' or `500pps from any given source address'
-	  with a single rule.
+endif # NETFILTER_XTABLES
 
 endmenu
 
+source "net/netfilter/ipvs/Kconfig"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3bd2cc556ae..da3d909e053 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -34,6 +34,9 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
 obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 
+# transparent proxy support
+obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
+
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
@@ -48,6 +51,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
@@ -76,10 +80,15 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
+
+# IPVS
+obj-$(CONFIG_IP_VS) += ipvs/
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 292fa28146f..a90ac83c591 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -26,7 +26,7 @@
 
 static DEFINE_MUTEX(afinfo_mutex);
 
-const struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly;
+const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
 EXPORT_SYMBOL(nf_afinfo);
 
 int nf_register_afinfo(const struct nf_afinfo *afinfo)
@@ -51,7 +51,7 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
 }
 EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
 
-struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly;
+struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
 EXPORT_SYMBOL(nf_hooks);
 static DEFINE_MUTEX(nf_hook_mutex);
 
@@ -113,7 +113,7 @@ EXPORT_SYMBOL(nf_unregister_hooks);
 
 unsigned int nf_iterate(struct list_head *head,
 			struct sk_buff *skb,
-			int hook,
+			unsigned int hook,
 			const struct net_device *indev,
 			const struct net_device *outdev,
 			struct list_head **i,
@@ -155,7 +155,7 @@ unsigned int nf_iterate(struct list_head *head,
 
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
+int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev,
 		 struct net_device *outdev,
 		 int (*okfn)(struct sk_buff *),
@@ -165,14 +165,6 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
 	unsigned int verdict;
 	int ret = 0;
 
-#ifdef CONFIG_NET_NS
-	struct net *net;
-
-	net = indev == NULL ? dev_net(outdev) : dev_net(indev);
-	if (net != &init_net)
-		return 1;
-#endif
-
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
 
@@ -264,7 +256,7 @@ EXPORT_SYMBOL(proc_net_netfilter);
 void __init netfilter_init(void)
 {
 	int i, h;
-	for (i = 0; i < NPROTO; i++) {
+	for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
 			INIT_LIST_HEAD(&nf_hooks[i][h]);
 	}
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
new file mode 100644
index 00000000000..de6004de80b
--- /dev/null
+++ b/net/netfilter/ipvs/Kconfig
@@ -0,0 +1,239 @@
+#
+# IP Virtual Server configuration
+#
+menuconfig IP_VS
+	tristate "IP virtual server support (EXPERIMENTAL)"
+	depends on NETFILTER
+	---help---
+	  IP Virtual Server support will let you build a high-performance
+	  virtual server based on cluster of two or more real servers. This
+	  option must be enabled for at least one of the clustered computers
+	  that will take care of intercepting incoming connections to a
+	  single IP address and scheduling them to real servers.
+
+	  Three request dispatching techniques are implemented, they are
+	  virtual server via NAT, virtual server via tunneling and virtual
+	  server via direct routing. The several scheduling algorithms can
+	  be used to choose which server the connection is directed to,
+	  thus load balancing can be achieved among the servers.  For more
+	  information and its administration program, please visit the
+	  following URL: <http://www.linuxvirtualserver.org/>.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+if IP_VS
+
+config	IP_VS_IPV6
+	bool "IPv6 support for IPVS (DANGEROUS)"
+	depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
+	---help---
+	  Add IPv6 support to IPVS. This is incomplete and might be dangerous.
+
+	  Say N if unsure.
+
+config	IP_VS_DEBUG
+	bool "IP virtual server debugging"
+	---help---
+	  Say Y here if you want to get additional messages useful in
+	  debugging the IP virtual server code. You can change the debug
+	  level in /proc/sys/net/ipv4/vs/debug_level
+
+config	IP_VS_TAB_BITS
+	int "IPVS connection table size (the Nth power of 2)"
+	range 8 20
+	default 12
+	---help---
+	  The IPVS connection hash table uses the chaining scheme to handle
+	  hash collisions. Using a big IPVS connection hash table will greatly
+	  reduce conflicts when there are hundreds of thousands of connections
+	  in the hash table.
+
+	  Note the table size must be power of 2. The table size will be the
+	  value of 2 to the your input number power. The number to choose is
+	  from 8 to 20, the default number is 12, which means the table size
+	  is 4096. Don't input the number too small, otherwise you will lose
+	  performance on it. You can adapt the table size yourself, according
+	  to your virtual server application. It is good to set the table size
+	  not far less than the number of connections per second multiplying
+	  average lasting time of connection in the table.  For example, your
+	  virtual server gets 200 connections per second, the connection lasts
+	  for 200 seconds in average in the connection table, the table size
+	  should be not far less than 200x200, it is good to set the table
+	  size 32768 (2**15).
+
+	  Another note that each connection occupies 128 bytes effectively and
+	  each hash entry uses 8 bytes, so you can estimate how much memory is
+	  needed for your box.
+
+comment "IPVS transport protocol load balancing support"
+
+config	IP_VS_PROTO_TCP
+	bool "TCP load balancing support"
+	---help---
+	  This option enables support for load balancing TCP transport
+	  protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_UDP
+	bool "UDP load balancing support"
+	---help---
+	  This option enables support for load balancing UDP transport
+	  protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_AH_ESP
+	bool
+	depends on UNDEFINED
+
+config	IP_VS_PROTO_ESP
+	bool "ESP load balancing support"
+	select IP_VS_PROTO_AH_ESP
+	---help---
+	  This option enables support for load balancing ESP (Encapsulation
+	  Security Payload) transport protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_AH
+	bool "AH load balancing support"
+	select IP_VS_PROTO_AH_ESP
+	---help---
+	  This option enables support for load balancing AH (Authentication
+	  Header) transport protocol. Say Y if unsure.
+
+comment "IPVS scheduler"
+
+config	IP_VS_RR
+	tristate "round-robin scheduling"
+	---help---
+	  The robin-robin scheduling algorithm simply directs network
+	  connections to different real servers in a round-robin manner.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+ 
+config	IP_VS_WRR
+        tristate "weighted round-robin scheduling" 
+	---help---
+	  The weighted robin-robin scheduling algorithm directs network
+	  connections to different real servers based on server weights
+	  in a round-robin manner. Servers with higher weights receive
+	  new connections first than those with less weights, and servers
+	  with higher weights get more connections than those with less
+	  weights and servers with equal weights get equal connections.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_LC
+        tristate "least-connection scheduling"
+	---help---
+	  The least-connection scheduling algorithm directs network
+	  connections to the server with the least number of active 
+	  connections.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_WLC
+        tristate "weighted least-connection scheduling"
+	---help---
+	  The weighted least-connection scheduling algorithm directs network
+	  connections to the server with the least active connections
+	  normalized by the server weight.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_LBLC
+	tristate "locality-based least-connection scheduling"
+	---help---
+	  The locality-based least-connection scheduling algorithm is for
+	  destination IP load balancing. It is usually used in cache cluster.
+	  This algorithm usually directs packet destined for an IP address to
+	  its server if the server is alive and under load. If the server is
+	  overloaded (its active connection numbers is larger than its weight)
+	  and there is a server in its half load, then allocate the weighted
+	  least-connection server to this IP address.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config  IP_VS_LBLCR
+	tristate "locality-based least-connection with replication scheduling"
+	---help---
+	  The locality-based least-connection with replication scheduling
+	  algorithm is also for destination IP load balancing. It is 
+	  usually used in cache cluster. It differs from the LBLC scheduling
+	  as follows: the load balancer maintains mappings from a target
+	  to a set of server nodes that can serve the target. Requests for
+	  a target are assigned to the least-connection node in the target's
+	  server set. If all the node in the server set are over loaded,
+	  it picks up a least-connection node in the cluster and adds it
+	  in the sever set for the target. If the server set has not been
+	  modified for the specified time, the most loaded node is removed
+	  from the server set, in order to avoid high degree of replication.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_DH
+	tristate "destination hashing scheduling"
+	---help---
+	  The destination hashing scheduling algorithm assigns network
+	  connections to the servers through looking up a statically assigned
+	  hash table by their destination IP addresses.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_SH
+	tristate "source hashing scheduling"
+	---help---
+	  The source hashing scheduling algorithm assigns network
+	  connections to the servers through looking up a statically assigned
+	  hash table by their source IP addresses.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_SED
+	tristate "shortest expected delay scheduling"
+	---help---
+	  The shortest expected delay scheduling algorithm assigns network
+	  connections to the server with the shortest expected delay. The 
+	  expected delay that the job will experience is (Ci + 1) / Ui if 
+	  sent to the ith server, in which Ci is the number of connections
+	  on the ith server and Ui is the fixed service rate (weight)
+	  of the ith server.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_NQ
+	tristate "never queue scheduling"
+	---help---
+	  The never queue scheduling algorithm adopts a two-speed model.
+	  When there is an idle server available, the job will be sent to
+	  the idle server, instead of waiting for a fast one. When there
+	  is no idle server available, the job will be sent to the server
+	  that minimize its expected delay (The Shortest Expected Delay
+	  scheduling algorithm).
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+comment 'IPVS application helper'
+
+config	IP_VS_FTP
+  	tristate "FTP protocol helper"
+        depends on IP_VS_PROTO_TCP
+	---help---
+	  FTP is a protocol that transfers IP address and/or port number in
+	  the payload. In the virtual server via Network Address Translation,
+	  the IP address and port number of real servers cannot be sent to
+	  clients in ftp connections directly, so FTP protocol helper is
+	  required for tracking the connection and mangling it back to that of
+	  virtual service.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+endif # IP_VS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
new file mode 100644
index 00000000000..73a46fe1fe4
--- /dev/null
+++ b/net/netfilter/ipvs/Makefile
@@ -0,0 +1,33 @@
+#
+# Makefile for the IPVS modules on top of IPv4.
+#
+
+# IPVS transport protocol load balancing support
+ip_vs_proto-objs-y :=
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
+
+ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
+		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
+		ip_vs_est.o ip_vs_proto.o 				   \
+		$(ip_vs_proto-objs-y)
+
+
+# IPVS core
+obj-$(CONFIG_IP_VS) += ip_vs.o
+
+# IPVS schedulers
+obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
+obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
+obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
+obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
+obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
+obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
+obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
+obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
+obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+
+# IPVS application helpers
+obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
new file mode 100644
index 00000000000..201b8ea3020
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -0,0 +1,622 @@
+/*
+ * ip_vs_app.c: Application module support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
+ * is that ip_vs_app module handles the reverse direction (incoming requests
+ * and outgoing responses).
+ *
+ *		IP_MASQ_APP application masquerading module
+ *
+ * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/net_namespace.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+
+#include <net/ip_vs.h>
+
+EXPORT_SYMBOL(register_ip_vs_app);
+EXPORT_SYMBOL(unregister_ip_vs_app);
+EXPORT_SYMBOL(register_ip_vs_app_inc);
+
+/* ipvs application list head */
+static LIST_HEAD(ip_vs_app_list);
+static DEFINE_MUTEX(__ip_vs_app_mutex);
+
+
+/*
+ *	Get an ip_vs_app object
+ */
+static inline int ip_vs_app_get(struct ip_vs_app *app)
+{
+	return try_module_get(app->module);
+}
+
+
+static inline void ip_vs_app_put(struct ip_vs_app *app)
+{
+	module_put(app->module);
+}
+
+
+/*
+ *	Allocate/initialize app incarnation and register it in proto apps.
+ */
+static int
+ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+{
+	struct ip_vs_protocol *pp;
+	struct ip_vs_app *inc;
+	int ret;
+
+	if (!(pp = ip_vs_proto_get(proto)))
+		return -EPROTONOSUPPORT;
+
+	if (!pp->unregister_app)
+		return -EOPNOTSUPP;
+
+	inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
+	if (!inc)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&inc->p_list);
+	INIT_LIST_HEAD(&inc->incs_list);
+	inc->app = app;
+	inc->port = htons(port);
+	atomic_set(&inc->usecnt, 0);
+
+	if (app->timeouts) {
+		inc->timeout_table =
+			ip_vs_create_timeout_table(app->timeouts,
+						   app->timeouts_size);
+		if (!inc->timeout_table) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	ret = pp->register_app(inc);
+	if (ret)
+		goto out;
+
+	list_add(&inc->a_list, &app->incs_list);
+	IP_VS_DBG(9, "%s application %s:%u registered\n",
+		  pp->name, inc->name, inc->port);
+
+	return 0;
+
+  out:
+	kfree(inc->timeout_table);
+	kfree(inc);
+	return ret;
+}
+
+
+/*
+ *	Release app incarnation
+ */
+static void
+ip_vs_app_inc_release(struct ip_vs_app *inc)
+{
+	struct ip_vs_protocol *pp;
+
+	if (!(pp = ip_vs_proto_get(inc->protocol)))
+		return;
+
+	if (pp->unregister_app)
+		pp->unregister_app(inc);
+
+	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
+		  pp->name, inc->name, inc->port);
+
+	list_del(&inc->a_list);
+
+	kfree(inc->timeout_table);
+	kfree(inc);
+}
+
+
+/*
+ *	Get reference to app inc (only called from softirq)
+ *
+ */
+int ip_vs_app_inc_get(struct ip_vs_app *inc)
+{
+	int result;
+
+	atomic_inc(&inc->usecnt);
+	if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
+		atomic_dec(&inc->usecnt);
+	return result;
+}
+
+
+/*
+ *	Put the app inc (only called from timer or net softirq)
+ */
+void ip_vs_app_inc_put(struct ip_vs_app *inc)
+{
+	ip_vs_app_put(inc->app);
+	atomic_dec(&inc->usecnt);
+}
+
+
+/*
+ *	Register an application incarnation in protocol applications
+ */
+int
+register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+{
+	int result;
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	result = ip_vs_app_inc_new(app, proto, port);
+
+	mutex_unlock(&__ip_vs_app_mutex);
+
+	return result;
+}
+
+
+/*
+ *	ip_vs_app registration routine
+ */
+int register_ip_vs_app(struct ip_vs_app *app)
+{
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	list_add(&app->a_list, &ip_vs_app_list);
+
+	mutex_unlock(&__ip_vs_app_mutex);
+
+	return 0;
+}
+
+
+/*
+ *	ip_vs_app unregistration routine
+ *	We are sure there are no app incarnations attached to services
+ */
+void unregister_ip_vs_app(struct ip_vs_app *app)
+{
+	struct ip_vs_app *inc, *nxt;
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
+		ip_vs_app_inc_release(inc);
+	}
+
+	list_del(&app->a_list);
+
+	mutex_unlock(&__ip_vs_app_mutex);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+}
+
+
+/*
+ *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
+ */
+int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+{
+	return pp->app_conn_bind(cp);
+}
+
+
+/*
+ *	Unbind cp from application incarnation (called by cp destructor)
+ */
+void ip_vs_unbind_app(struct ip_vs_conn *cp)
+{
+	struct ip_vs_app *inc = cp->app;
+
+	if (!inc)
+		return;
+
+	if (inc->unbind_conn)
+		inc->unbind_conn(inc, cp);
+	if (inc->done_conn)
+		inc->done_conn(inc, cp);
+	ip_vs_app_inc_put(inc);
+	cp->app = NULL;
+}
+
+
+/*
+ *	Fixes th->seq based on ip_vs_seq info.
+ */
+static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 seq = ntohl(th->seq);
+
+	/*
+	 *	Adjust seq with delta-offset for all packets after
+	 *	the most recent resized pkt seq and with previous_delta offset
+	 *	for all packets	before most recent resized pkt seq.
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		if(after(seq, vseq->init_seq)) {
+			th->seq = htonl(seq + vseq->delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
+				  vseq->delta);
+		} else {
+			th->seq = htonl(seq + vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
+				  "(%d) to seq\n", vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Fixes th->ack_seq based on ip_vs_seq info.
+ */
+static inline void
+vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 ack_seq = ntohl(th->ack_seq);
+
+	/*
+	 * Adjust ack_seq with delta-offset for
+	 * the packets AFTER most recent resized pkt has caused a shift
+	 * for packets before most recent resized pkt, use previous_delta
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		/* since ack_seq is the number of octet that is expected
+		   to receive next, so compare it with init_seq+delta */
+		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
+			th->ack_seq = htonl(ack_seq - vseq->delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
+				  "(%d) from ack_seq\n", vseq->delta);
+
+		} else {
+			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
+				  "previous_delta (%d) from ack_seq\n",
+				  vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Updates ip_vs_seq if pkt has been resized
+ *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
+ */
+static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
+				 unsigned flag, __u32 seq, int diff)
+{
+	/* spinlock is to keep updating cp->flags atomic */
+	spin_lock(&cp->lock);
+	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
+		vseq->previous_delta = vseq->delta;
+		vseq->delta += diff;
+		vseq->init_seq = seq;
+		cp->flags |= flag;
+	}
+	spin_unlock(&cp->lock);
+}
+
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
+				  struct ip_vs_app *app)
+{
+	int diff;
+	const unsigned int tcp_offset = ip_hdrlen(skb);
+	struct tcphdr *th;
+	__u32 seq;
+
+	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+		return 0;
+
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+		vs_fix_seq(&cp->out_seq, th);
+	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+		vs_fix_ack_seq(&cp->in_seq, th);
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (app->pkt_out == NULL)
+		return 1;
+
+	if (!app->pkt_out(app, cp, skb, &diff))
+		return 0;
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0)
+		vs_seq_update(cp, &cp->out_seq,
+			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
+
+	return 1;
+}
+
+/*
+ *	Output pkt hook. Will call bound ip_vs_app specific function
+ *	called by ipvs packet handler, assumes previously checked cp!=NULL
+ *	returns false if it can't handle packet (oom)
+ */
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_app *app;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((app = cp->app) == NULL)
+		return 1;
+
+	/* TCP is complicated */
+	if (cp->protocol == IPPROTO_TCP)
+		return app_tcp_pkt_out(cp, skb, app);
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (app->pkt_out == NULL)
+		return 1;
+
+	return app->pkt_out(app, cp, skb, NULL);
+}
+
+
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
+				 struct ip_vs_app *app)
+{
+	int diff;
+	const unsigned int tcp_offset = ip_hdrlen(skb);
+	struct tcphdr *th;
+	__u32 seq;
+
+	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+		return 0;
+
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+		vs_fix_seq(&cp->in_seq, th);
+	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+		vs_fix_ack_seq(&cp->out_seq, th);
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (app->pkt_in == NULL)
+		return 1;
+
+	if (!app->pkt_in(app, cp, skb, &diff))
+		return 0;
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0)
+		vs_seq_update(cp, &cp->in_seq,
+			      IP_VS_CONN_F_IN_SEQ, seq, diff);
+
+	return 1;
+}
+
+/*
+ *	Input pkt hook. Will call bound ip_vs_app specific function
+ *	called by ipvs packet handler, assumes previously checked cp!=NULL.
+ *	returns false if can't handle packet (oom).
+ */
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_app *app;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((app = cp->app) == NULL)
+		return 1;
+
+	/* TCP is complicated */
+	if (cp->protocol == IPPROTO_TCP)
+		return app_tcp_pkt_in(cp, skb, app);
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (app->pkt_in == NULL)
+		return 1;
+
+	return app->pkt_in(app, cp, skb, NULL);
+}
+
+
+#ifdef CONFIG_PROC_FS
+/*
+ *	/proc/net/ip_vs_app entry function
+ */
+
+static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+{
+	struct ip_vs_app *app, *inc;
+
+	list_for_each_entry(app, &ip_vs_app_list, a_list) {
+		list_for_each_entry(inc, &app->incs_list, a_list) {
+			if (pos-- == 0)
+				return inc;
+		}
+	}
+	return NULL;
+
+}
+
+static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	mutex_lock(&__ip_vs_app_mutex);
+
+	return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_vs_app *inc, *app;
+	struct list_head *e;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_app_idx(0);
+
+	inc = v;
+	app = inc->app;
+
+	if ((e = inc->a_list.next) != &app->incs_list)
+		return list_entry(e, struct ip_vs_app, a_list);
+
+	/* go on to next application */
+	for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+		app = list_entry(e, struct ip_vs_app, a_list);
+		list_for_each_entry(inc, &app->incs_list, a_list) {
+			return inc;
+		}
+	}
+	return NULL;
+}
+
+static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
+{
+	mutex_unlock(&__ip_vs_app_mutex);
+}
+
+static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "prot port    usecnt name\n");
+	else {
+		const struct ip_vs_app *inc = v;
+
+		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
+			   ip_vs_proto_name(inc->protocol),
+			   ntohs(inc->port),
+			   atomic_read(&inc->usecnt),
+			   inc->name);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_app_seq_ops = {
+	.start = ip_vs_app_seq_start,
+	.next  = ip_vs_app_seq_next,
+	.stop  = ip_vs_app_seq_stop,
+	.show  = ip_vs_app_seq_show,
+};
+
+static int ip_vs_app_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ip_vs_app_seq_ops);
+}
+
+static const struct file_operations ip_vs_app_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = ip_vs_app_open,
+	.read	 = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+#endif
+
+
+/*
+ *	Replace a segment of data with a new segment
+ */
+int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
+		      char *o_buf, int o_len, char *n_buf, int n_len)
+{
+	int diff;
+	int o_offset;
+	int o_left;
+
+	EnterFunction(9);
+
+	diff = n_len - o_len;
+	o_offset = o_buf - (char *)skb->data;
+	/* The length of left data after o_buf+o_len in the skb data */
+	o_left = skb->len - (o_offset + o_len);
+
+	if (diff <= 0) {
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+		skb_trim(skb, skb->len + diff);
+	} else if (diff <= skb_tailroom(skb)) {
+		skb_put(skb, diff);
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+	} else {
+		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
+			return -ENOMEM;
+		skb_put(skb, diff);
+		memmove(skb->data + o_offset + n_len,
+			skb->data + o_offset + o_len, o_left);
+		skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
+	}
+
+	/* must update the iph total length here */
+	ip_hdr(skb)->tot_len = htons(skb->len);
+
+	LeaveFunction(9);
+	return 0;
+}
+
+
+int __init ip_vs_app_init(void)
+{
+	/* we will replace it with proc_net_ipvs_create() soon */
+	proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+	return 0;
+}
+
+
+void ip_vs_app_cleanup(void)
+{
+	proc_net_remove(&init_net, "ip_vs_app");
+}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
new file mode 100644
index 00000000000..9a24332fbed
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -0,0 +1,1110 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others. Many code here is taken from IP MASQ code of kernel 2.2.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>		/* for proc_net_* */
+#include <linux/seq_file.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+
+#include <net/net_namespace.h>
+#include <net/ip_vs.h>
+
+
+/*
+ *  Connection hash table: for input and output packets lookups of IPVS
+ */
+static struct list_head *ip_vs_conn_tab;
+
+/*  SLAB cache for IPVS connections */
+static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
+
+/*  counter for current IPVS connections */
+static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
+
+/*  counter for no client port connections */
+static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
+
+/* random value for IPVS connection hash */
+static unsigned int ip_vs_conn_rnd;
+
+/*
+ *  Fine locking granularity for big connection hash table
+ */
+#define CT_LOCKARRAY_BITS  4
+#define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
+#define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
+
+struct ip_vs_aligned_lock
+{
+	rwlock_t	l;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+/* lock array for conn table */
+static struct ip_vs_aligned_lock
+__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
+
+static inline void ct_read_lock(unsigned key)
+{
+	read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock(unsigned key)
+{
+	read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock(unsigned key)
+{
+	write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock(unsigned key)
+{
+	write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_lock_bh(unsigned key)
+{
+	read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock_bh(unsigned key)
+{
+	read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock_bh(unsigned key)
+{
+	write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock_bh(unsigned key)
+{
+	write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+
+/*
+ *	Returns hash value for IPVS connection entry
+ */
+static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+				       const union nf_inet_addr *addr,
+				       __be16 port)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+				    (__force u32)port, proto, ip_vs_conn_rnd)
+			& IP_VS_CONN_TAB_MASK;
+#endif
+	return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+			    ip_vs_conn_rnd)
+		& IP_VS_CONN_TAB_MASK;
+}
+
+
+/*
+ *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ *	returns bool success.
+ */
+static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
+{
+	unsigned hash;
+	int ret;
+
+	/* Hash by protocol, client address and port */
+	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
+
+	ct_write_lock(hash);
+
+	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
+		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
+		cp->flags |= IP_VS_CONN_F_HASHED;
+		atomic_inc(&cp->refcnt);
+		ret = 1;
+	} else {
+		IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		ret = 0;
+	}
+
+	ct_write_unlock(hash);
+
+	return ret;
+}
+
+
+/*
+ *	UNhashes ip_vs_conn from ip_vs_conn_tab.
+ *	returns bool success.
+ */
+static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
+{
+	unsigned hash;
+	int ret;
+
+	/* unhash it and decrease its reference counter */
+	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
+
+	ct_write_lock(hash);
+
+	if (cp->flags & IP_VS_CONN_F_HASHED) {
+		list_del(&cp->c_list);
+		cp->flags &= ~IP_VS_CONN_F_HASHED;
+		atomic_dec(&cp->refcnt);
+		ret = 1;
+	} else
+		ret = 0;
+
+	ct_write_unlock(hash);
+
+	return ret;
+}
+
+
+/*
+ *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ *  Called for pkts coming from OUTside-to-INside.
+ *	s_addr, s_port: pkt source address (foreign host)
+ *	d_addr, d_port: pkt dest address (load balancer)
+ */
+static inline struct ip_vs_conn *__ip_vs_conn_in_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp;
+
+	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (cp->af == af &&
+		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+		    s_port == cp->cport && d_port == cp->vport &&
+		    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
+		    protocol == cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			ct_read_unlock(hash);
+			return cp;
+		}
+	}
+
+	ct_read_unlock(hash);
+
+	return NULL;
+}
+
+struct ip_vs_conn *ip_vs_conn_in_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+	struct ip_vs_conn *cp;
+
+	cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
+	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
+		cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
+					 d_port);
+
+	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      cp ? "hit" : "not hit");
+
+	return cp;
+}
+
+/* Get reference to connection template */
+struct ip_vs_conn *ip_vs_ct_in_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp;
+
+	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (cp->af == af &&
+		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+		    s_port == cp->cport && d_port == cp->vport &&
+		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
+		    protocol == cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			goto out;
+		}
+	}
+	cp = NULL;
+
+  out:
+	ct_read_unlock(hash);
+
+	IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      cp ? "hit" : "not hit");
+
+	return cp;
+}
+
+/*
+ *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ *  Called for pkts coming from inside-to-OUTside.
+ *	s_addr, s_port: pkt source address (inside host)
+ *	d_addr, d_port: pkt dest address (foreign host)
+ */
+struct ip_vs_conn *ip_vs_conn_out_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp, *ret=NULL;
+
+	/*
+	 *	Check for "full" addressed entries
+	 */
+	hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (cp->af == af &&
+		    ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
+		    ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
+		    d_port == cp->cport && s_port == cp->dport &&
+		    protocol == cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			ret = cp;
+			break;
+		}
+	}
+
+	ct_read_unlock(hash);
+
+	IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      ret ? "hit" : "not hit");
+
+	return ret;
+}
+
+
+/*
+ *      Put back the conn and restart its timer with its timeout
+ */
+void ip_vs_conn_put(struct ip_vs_conn *cp)
+{
+	/* reset it expire in its timeout */
+	mod_timer(&cp->timer, jiffies+cp->timeout);
+
+	__ip_vs_conn_put(cp);
+}
+
+
+/*
+ *	Fill a no_client_port connection with a client port number
+ */
+void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
+{
+	if (ip_vs_conn_unhash(cp)) {
+		spin_lock(&cp->lock);
+		if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
+			atomic_dec(&ip_vs_conn_no_cport_cnt);
+			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
+			cp->cport = cport;
+		}
+		spin_unlock(&cp->lock);
+
+		/* hash on new dport */
+		ip_vs_conn_hash(cp);
+	}
+}
+
+
+/*
+ *	Bind a connection entry with the corresponding packet_xmit.
+ *	Called by ip_vs_conn_new.
+ */
+static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
+{
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		cp->packet_xmit = ip_vs_nat_xmit;
+		break;
+
+	case IP_VS_CONN_F_TUNNEL:
+		cp->packet_xmit = ip_vs_tunnel_xmit;
+		break;
+
+	case IP_VS_CONN_F_DROUTE:
+		cp->packet_xmit = ip_vs_dr_xmit;
+		break;
+
+	case IP_VS_CONN_F_LOCALNODE:
+		cp->packet_xmit = ip_vs_null_xmit;
+		break;
+
+	case IP_VS_CONN_F_BYPASS:
+		cp->packet_xmit = ip_vs_bypass_xmit;
+		break;
+	}
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
+{
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		cp->packet_xmit = ip_vs_nat_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_TUNNEL:
+		cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_DROUTE:
+		cp->packet_xmit = ip_vs_dr_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_LOCALNODE:
+		cp->packet_xmit = ip_vs_null_xmit;
+		break;
+
+	case IP_VS_CONN_F_BYPASS:
+		cp->packet_xmit = ip_vs_bypass_xmit_v6;
+		break;
+	}
+}
+#endif
+
+
+static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
+{
+	return atomic_read(&dest->activeconns)
+		+ atomic_read(&dest->inactconns);
+}
+
+/*
+ *	Bind a connection entry with a virtual service destination
+ *	Called just after a new connection entry is created.
+ */
+static inline void
+ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
+{
+	/* if dest is NULL, then return directly */
+	if (!dest)
+		return;
+
+	/* Increase the refcnt counter of the dest */
+	atomic_inc(&dest->refcnt);
+
+	/* Bind with the destination and its corresponding transmitter */
+	if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+	    (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
+		/* if the connection is not template and is created
+		 * by sync, preserve the activity flag.
+		 */
+		cp->flags |= atomic_read(&dest->conn_flags) &
+			     (~IP_VS_CONN_F_INACTIVE);
+	else
+		cp->flags |= atomic_read(&dest->conn_flags);
+	cp->dest = dest;
+
+	IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
+		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		      "dest->refcnt:%d\n",
+		      ip_vs_proto_name(cp->protocol),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+		      ip_vs_fwd_tag(cp), cp->state,
+		      cp->flags, atomic_read(&cp->refcnt),
+		      atomic_read(&dest->refcnt));
+
+	/* Update the connection counters */
+	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
+		/* It is a normal connection, so increase the inactive
+		   connection counter because it is in TCP SYNRECV
+		   state (inactive) or other protocol inacive state */
+		if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+		    (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+			atomic_inc(&dest->activeconns);
+		else
+			atomic_inc(&dest->inactconns);
+	} else {
+		/* It is a persistent connection/template, so increase
+		   the peristent connection counter */
+		atomic_inc(&dest->persistconns);
+	}
+
+	if (dest->u_threshold != 0 &&
+	    ip_vs_dest_totalconns(dest) >= dest->u_threshold)
+		dest->flags |= IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ * Check if there is a destination for the connection, if so
+ * bind the connection to the destination.
+ */
+struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+{
+	struct ip_vs_dest *dest;
+
+	if ((cp) && (!cp->dest)) {
+		dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+				       &cp->vaddr, cp->vport,
+				       cp->protocol);
+		ip_vs_bind_dest(cp, dest);
+		return dest;
+	} else
+		return NULL;
+}
+
+
+/*
+ *	Unbind a connection entry with its VS destination
+ *	Called by the ip_vs_conn_expire function.
+ */
+static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
+{
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (!dest)
+		return;
+
+	IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
+		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		      "dest->refcnt:%d\n",
+		      ip_vs_proto_name(cp->protocol),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+		      ip_vs_fwd_tag(cp), cp->state,
+		      cp->flags, atomic_read(&cp->refcnt),
+		      atomic_read(&dest->refcnt));
+
+	/* Update the connection counters */
+	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
+		/* It is a normal connection, so decrease the inactconns
+		   or activeconns counter */
+		if (cp->flags & IP_VS_CONN_F_INACTIVE) {
+			atomic_dec(&dest->inactconns);
+		} else {
+			atomic_dec(&dest->activeconns);
+		}
+	} else {
+		/* It is a persistent connection/template, so decrease
+		   the peristent connection counter */
+		atomic_dec(&dest->persistconns);
+	}
+
+	if (dest->l_threshold != 0) {
+		if (ip_vs_dest_totalconns(dest) < dest->l_threshold)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	} else if (dest->u_threshold != 0) {
+		if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold * 3)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	} else {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	}
+
+	/*
+	 * Simply decrease the refcnt of the dest, because the
+	 * dest will be either in service's destination list
+	 * or in the trash.
+	 */
+	atomic_dec(&dest->refcnt);
+}
+
+
+/*
+ *	Checking if the destination of a connection template is available.
+ *	If available, return 1, otherwise invalidate this connection
+ *	template and return 0.
+ */
+int ip_vs_check_template(struct ip_vs_conn *ct)
+{
+	struct ip_vs_dest *dest = ct->dest;
+
+	/*
+	 * Checking the dest server status.
+	 */
+	if ((dest == NULL) ||
+	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
+	    (sysctl_ip_vs_expire_quiescent_template &&
+	     (atomic_read(&dest->weight) == 0))) {
+		IP_VS_DBG_BUF(9, "check_template: dest not available for "
+			      "protocol %s s:%s:%d v:%s:%d "
+			      "-> d:%s:%d\n",
+			      ip_vs_proto_name(ct->protocol),
+			      IP_VS_DBG_ADDR(ct->af, &ct->caddr),
+			      ntohs(ct->cport),
+			      IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
+			      ntohs(ct->vport),
+			      IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+			      ntohs(ct->dport));
+
+		/*
+		 * Invalidate the connection template
+		 */
+		if (ct->vport != htons(0xffff)) {
+			if (ip_vs_conn_unhash(ct)) {
+				ct->dport = htons(0xffff);
+				ct->vport = htons(0xffff);
+				ct->cport = 0;
+				ip_vs_conn_hash(ct);
+			}
+		}
+
+		/*
+		 * Simply decrease the refcnt of the template,
+		 * don't restart its timer.
+		 */
+		atomic_dec(&ct->refcnt);
+		return 0;
+	}
+	return 1;
+}
+
+static void ip_vs_conn_expire(unsigned long data)
+{
+	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+
+	cp->timeout = 60*HZ;
+
+	/*
+	 *	hey, I'm using it
+	 */
+	atomic_inc(&cp->refcnt);
+
+	/*
+	 *	do I control anybody?
+	 */
+	if (atomic_read(&cp->n_control))
+		goto expire_later;
+
+	/*
+	 *	unhash it if it is hashed in the conn table
+	 */
+	if (!ip_vs_conn_unhash(cp))
+		goto expire_later;
+
+	/*
+	 *	refcnt==1 implies I'm the only one referrer
+	 */
+	if (likely(atomic_read(&cp->refcnt) == 1)) {
+		/* delete the timer if it is activated by other users */
+		if (timer_pending(&cp->timer))
+			del_timer(&cp->timer);
+
+		/* does anybody control me? */
+		if (cp->control)
+			ip_vs_control_del(cp);
+
+		if (unlikely(cp->app != NULL))
+			ip_vs_unbind_app(cp);
+		ip_vs_unbind_dest(cp);
+		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
+			atomic_dec(&ip_vs_conn_no_cport_cnt);
+		atomic_dec(&ip_vs_conn_count);
+
+		kmem_cache_free(ip_vs_conn_cachep, cp);
+		return;
+	}
+
+	/* hash it back to the table */
+	ip_vs_conn_hash(cp);
+
+  expire_later:
+	IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
+		  atomic_read(&cp->refcnt)-1,
+		  atomic_read(&cp->n_control));
+
+	ip_vs_conn_put(cp);
+}
+
+
+void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
+{
+	if (del_timer(&cp->timer))
+		mod_timer(&cp->timer, jiffies);
+}
+
+
+/*
+ *	Create a new connection entry and hash it into the ip_vs_conn_tab
+ */
+struct ip_vs_conn *
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+	       const union nf_inet_addr *vaddr, __be16 vport,
+	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
+	       struct ip_vs_dest *dest)
+{
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+
+	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
+	if (cp == NULL) {
+		IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&cp->c_list);
+	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+	cp->af		   = af;
+	cp->protocol	   = proto;
+	ip_vs_addr_copy(af, &cp->caddr, caddr);
+	cp->cport	   = cport;
+	ip_vs_addr_copy(af, &cp->vaddr, vaddr);
+	cp->vport	   = vport;
+	ip_vs_addr_copy(af, &cp->daddr, daddr);
+	cp->dport          = dport;
+	cp->flags	   = flags;
+	spin_lock_init(&cp->lock);
+
+	/*
+	 * Set the entry is referenced by the current thread before hashing
+	 * it in the table, so that other thread run ip_vs_random_dropentry
+	 * but cannot drop this entry.
+	 */
+	atomic_set(&cp->refcnt, 1);
+
+	atomic_set(&cp->n_control, 0);
+	atomic_set(&cp->in_pkts, 0);
+
+	atomic_inc(&ip_vs_conn_count);
+	if (flags & IP_VS_CONN_F_NO_CPORT)
+		atomic_inc(&ip_vs_conn_no_cport_cnt);
+
+	/* Bind the connection with a destination server */
+	ip_vs_bind_dest(cp, dest);
+
+	/* Set its state and timeout */
+	cp->state = 0;
+	cp->timeout = 3*HZ;
+
+	/* Bind its packet transmitter */
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_bind_xmit_v6(cp);
+	else
+#endif
+		ip_vs_bind_xmit(cp);
+
+	if (unlikely(pp && atomic_read(&pp->appcnt)))
+		ip_vs_bind_app(cp, pp);
+
+	/* Hash it in the ip_vs_conn_tab finally */
+	ip_vs_conn_hash(cp);
+
+	return cp;
+}
+
+
+/*
+ *	/proc/net/ip_vs_conn entries
+ */
+#ifdef CONFIG_PROC_FS
+
+static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+
+	for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+		ct_read_lock_bh(idx);
+		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+			if (pos-- == 0) {
+				seq->private = &ip_vs_conn_tab[idx];
+				return cp;
+			}
+		}
+		ct_read_unlock_bh(idx);
+	}
+
+	return NULL;
+}
+
+static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	seq->private = NULL;
+	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
+}
+
+static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_vs_conn *cp = v;
+	struct list_head *e, *l = seq->private;
+	int idx;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_conn_array(seq, 0);
+
+	/* more on same hash chain? */
+	if ((e = cp->c_list.next) != l)
+		return list_entry(e, struct ip_vs_conn, c_list);
+
+	idx = l - ip_vs_conn_tab;
+	ct_read_unlock_bh(idx);
+
+	while (++idx < IP_VS_CONN_TAB_SIZE) {
+		ct_read_lock_bh(idx);
+		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+			seq->private = &ip_vs_conn_tab[idx];
+			return cp;
+		}
+		ct_read_unlock_bh(idx);
+	}
+	seq->private = NULL;
+	return NULL;
+}
+
+static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
+{
+	struct list_head *l = seq->private;
+
+	if (l)
+		ct_read_unlock_bh(l - ip_vs_conn_tab);
+}
+
+static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
+{
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires\n");
+	else {
+		const struct ip_vs_conn *cp = v;
+
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			seq_printf(seq,
+				"%-3s " NIP6_FMT " %04X " NIP6_FMT
+				" %04X " NIP6_FMT " %04X %-11s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				NIP6(cp->caddr.in6), ntohs(cp->cport),
+				NIP6(cp->vaddr.in6), ntohs(cp->vport),
+				NIP6(cp->daddr.in6), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				(cp->timer.expires-jiffies)/HZ);
+		else
+#endif
+			seq_printf(seq,
+				"%-3s %08X %04X %08X %04X"
+				" %08X %04X %-11s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				ntohl(cp->caddr.ip), ntohs(cp->cport),
+				ntohl(cp->vaddr.ip), ntohs(cp->vport),
+				ntohl(cp->daddr.ip), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				(cp->timer.expires-jiffies)/HZ);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_conn_seq_ops = {
+	.start = ip_vs_conn_seq_start,
+	.next  = ip_vs_conn_seq_next,
+	.stop  = ip_vs_conn_seq_stop,
+	.show  = ip_vs_conn_seq_show,
+};
+
+static int ip_vs_conn_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ip_vs_conn_seq_ops);
+}
+
+static const struct file_operations ip_vs_conn_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip_vs_conn_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static const char *ip_vs_origin_name(unsigned flags)
+{
+	if (flags & IP_VS_CONN_F_SYNC)
+		return "SYNC";
+	else
+		return "LOCAL";
+}
+
+static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
+{
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
+	else {
+		const struct ip_vs_conn *cp = v;
+
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			seq_printf(seq,
+				"%-3s " NIP6_FMT " %04X " NIP6_FMT
+				" %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				NIP6(cp->caddr.in6), ntohs(cp->cport),
+				NIP6(cp->vaddr.in6), ntohs(cp->vport),
+				NIP6(cp->daddr.in6), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_origin_name(cp->flags),
+				(cp->timer.expires-jiffies)/HZ);
+		else
+#endif
+			seq_printf(seq,
+				"%-3s %08X %04X %08X %04X "
+				"%08X %04X %-11s %-6s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				ntohl(cp->caddr.ip), ntohs(cp->cport),
+				ntohl(cp->vaddr.ip), ntohs(cp->vport),
+				ntohl(cp->daddr.ip), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_origin_name(cp->flags),
+				(cp->timer.expires-jiffies)/HZ);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_conn_sync_seq_ops = {
+	.start = ip_vs_conn_seq_start,
+	.next  = ip_vs_conn_seq_next,
+	.stop  = ip_vs_conn_seq_stop,
+	.show  = ip_vs_conn_sync_seq_show,
+};
+
+static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ip_vs_conn_sync_seq_ops);
+}
+
+static const struct file_operations ip_vs_conn_sync_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip_vs_conn_sync_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+
+/*
+ *      Randomly drop connection entries before running out of memory
+ */
+static inline int todrop_entry(struct ip_vs_conn *cp)
+{
+	/*
+	 * The drop rate array needs tuning for real environments.
+	 * Called from timer bh only => no locking
+	 */
+	static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+	static char todrop_counter[9] = {0};
+	int i;
+
+	/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
+	   This will leave enough time for normal connection to get
+	   through. */
+	if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
+		return 0;
+
+	/* Don't drop the entry if its number of incoming packets is not
+	   located in [0, 8] */
+	i = atomic_read(&cp->in_pkts);
+	if (i > 8 || i < 0) return 0;
+
+	if (!todrop_rate[i]) return 0;
+	if (--todrop_counter[i] > 0) return 0;
+
+	todrop_counter[i] = todrop_rate[i];
+	return 1;
+}
+
+/* Called from keventd and must protect itself from softirqs */
+void ip_vs_random_dropentry(void)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+
+	/*
+	 * Randomly scan 1/32 of the whole table every second
+	 */
+	for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
+		unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
+
+		/*
+		 *  Lock is actually needed in this loop.
+		 */
+		ct_write_lock_bh(hash);
+
+		list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+				/* connection template */
+				continue;
+
+			if (cp->protocol == IPPROTO_TCP) {
+				switch(cp->state) {
+				case IP_VS_TCP_S_SYN_RECV:
+				case IP_VS_TCP_S_SYNACK:
+					break;
+
+				case IP_VS_TCP_S_ESTABLISHED:
+					if (todrop_entry(cp))
+						break;
+					continue;
+
+				default:
+					continue;
+				}
+			} else {
+				if (!todrop_entry(cp))
+					continue;
+			}
+
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_expire_now(cp);
+			if (cp->control) {
+				IP_VS_DBG(4, "del conn template\n");
+				ip_vs_conn_expire_now(cp->control);
+			}
+		}
+		ct_write_unlock_bh(hash);
+	}
+}
+
+
+/*
+ *      Flush all the connection entries in the ip_vs_conn_tab
+ */
+static void ip_vs_conn_flush(void)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+
+  flush_again:
+	for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
+		/*
+		 *  Lock is actually needed in this loop.
+		 */
+		ct_write_lock_bh(idx);
+
+		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_expire_now(cp);
+			if (cp->control) {
+				IP_VS_DBG(4, "del conn template\n");
+				ip_vs_conn_expire_now(cp->control);
+			}
+		}
+		ct_write_unlock_bh(idx);
+	}
+
+	/* the counter may be not NULL, because maybe some conn entries
+	   are run by slow timer handler or unhashed but still referred */
+	if (atomic_read(&ip_vs_conn_count) != 0) {
+		schedule();
+		goto flush_again;
+	}
+}
+
+
+int __init ip_vs_conn_init(void)
+{
+	int idx;
+
+	/*
+	 * Allocate the connection hash table and initialize its list heads
+	 */
+	ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
+	if (!ip_vs_conn_tab)
+		return -ENOMEM;
+
+	/* Allocate ip_vs_conn slab cache */
+	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+					      sizeof(struct ip_vs_conn), 0,
+					      SLAB_HWCACHE_ALIGN, NULL);
+	if (!ip_vs_conn_cachep) {
+		vfree(ip_vs_conn_tab);
+		return -ENOMEM;
+	}
+
+	IP_VS_INFO("Connection hash table configured "
+		   "(size=%d, memory=%ldKbytes)\n",
+		   IP_VS_CONN_TAB_SIZE,
+		   (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
+	IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
+		  sizeof(struct ip_vs_conn));
+
+	for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+		INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
+	}
+
+	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
+		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
+	}
+
+	proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+	proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+
+	/* calculate the random value for connection hash */
+	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+
+	return 0;
+}
+
+
+void ip_vs_conn_cleanup(void)
+{
+	/* flush all the connection entries first */
+	ip_vs_conn_flush();
+
+	/* Release the empty cache */
+	kmem_cache_destroy(ip_vs_conn_cachep);
+	proc_net_remove(&init_net, "ip_vs_conn");
+	proc_net_remove(&init_net, "ip_vs_conn_sync");
+	vfree(ip_vs_conn_tab);
+}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
new file mode 100644
index 00000000000..958abf3e5f8
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -0,0 +1,1542 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others.
+ *
+ * Changes:
+ *	Paul `Rusty' Russell		properly handle non-linear skbs
+ *	Harald Welte			don't use nfcache
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/icmp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#endif
+
+#include <net/ip_vs.h>
+
+
+EXPORT_SYMBOL(register_ip_vs_scheduler);
+EXPORT_SYMBOL(unregister_ip_vs_scheduler);
+EXPORT_SYMBOL(ip_vs_skb_replace);
+EXPORT_SYMBOL(ip_vs_proto_name);
+EXPORT_SYMBOL(ip_vs_conn_new);
+EXPORT_SYMBOL(ip_vs_conn_in_get);
+EXPORT_SYMBOL(ip_vs_conn_out_get);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+EXPORT_SYMBOL(ip_vs_tcp_conn_listen);
+#endif
+EXPORT_SYMBOL(ip_vs_conn_put);
+#ifdef CONFIG_IP_VS_DEBUG
+EXPORT_SYMBOL(ip_vs_get_debug_level);
+#endif
+
+
+/* ID used in ICMP lookups */
+#define icmp_id(icmph)          (((icmph)->un).echo.id)
+#define icmpv6_id(icmph)        (icmph->icmp6_dataun.u_echo.identifier)
+
+const char *ip_vs_proto_name(unsigned proto)
+{
+	static char buf[20];
+
+	switch (proto) {
+	case IPPROTO_IP:
+		return "IP";
+	case IPPROTO_UDP:
+		return "UDP";
+	case IPPROTO_TCP:
+		return "TCP";
+	case IPPROTO_ICMP:
+		return "ICMP";
+#ifdef CONFIG_IP_VS_IPV6
+	case IPPROTO_ICMPV6:
+		return "ICMPv6";
+#endif
+	default:
+		sprintf(buf, "IP_%d", proto);
+		return buf;
+	}
+}
+
+void ip_vs_init_hash_table(struct list_head *table, int rows)
+{
+	while (--rows >= 0)
+		INIT_LIST_HEAD(&table[rows]);
+}
+
+static inline void
+ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		spin_lock(&dest->stats.lock);
+		dest->stats.ustats.inpkts++;
+		dest->stats.ustats.inbytes += skb->len;
+		spin_unlock(&dest->stats.lock);
+
+		spin_lock(&dest->svc->stats.lock);
+		dest->svc->stats.ustats.inpkts++;
+		dest->svc->stats.ustats.inbytes += skb->len;
+		spin_unlock(&dest->svc->stats.lock);
+
+		spin_lock(&ip_vs_stats.lock);
+		ip_vs_stats.ustats.inpkts++;
+		ip_vs_stats.ustats.inbytes += skb->len;
+		spin_unlock(&ip_vs_stats.lock);
+	}
+}
+
+
+static inline void
+ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		spin_lock(&dest->stats.lock);
+		dest->stats.ustats.outpkts++;
+		dest->stats.ustats.outbytes += skb->len;
+		spin_unlock(&dest->stats.lock);
+
+		spin_lock(&dest->svc->stats.lock);
+		dest->svc->stats.ustats.outpkts++;
+		dest->svc->stats.ustats.outbytes += skb->len;
+		spin_unlock(&dest->svc->stats.lock);
+
+		spin_lock(&ip_vs_stats.lock);
+		ip_vs_stats.ustats.outpkts++;
+		ip_vs_stats.ustats.outbytes += skb->len;
+		spin_unlock(&ip_vs_stats.lock);
+	}
+}
+
+
+static inline void
+ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
+{
+	spin_lock(&cp->dest->stats.lock);
+	cp->dest->stats.ustats.conns++;
+	spin_unlock(&cp->dest->stats.lock);
+
+	spin_lock(&svc->stats.lock);
+	svc->stats.ustats.conns++;
+	spin_unlock(&svc->stats.lock);
+
+	spin_lock(&ip_vs_stats.lock);
+	ip_vs_stats.ustats.conns++;
+	spin_unlock(&ip_vs_stats.lock);
+}
+
+
+static inline int
+ip_vs_set_state(struct ip_vs_conn *cp, int direction,
+		const struct sk_buff *skb,
+		struct ip_vs_protocol *pp)
+{
+	if (unlikely(!pp->state_transition))
+		return 0;
+	return pp->state_transition(cp, direction, skb, pp);
+}
+
+
+/*
+ *  IPVS persistent scheduling function
+ *  It creates a connection entry according to its template if exists,
+ *  or selects a server and creates a connection entry plus a template.
+ *  Locking: we are svc user (svc->refcnt), so we hold all dests too
+ *  Protocols supported: TCP, UDP
+ */
+static struct ip_vs_conn *
+ip_vs_sched_persist(struct ip_vs_service *svc,
+		    const struct sk_buff *skb,
+		    __be16 ports[2])
+{
+	struct ip_vs_conn *cp = NULL;
+	struct ip_vs_iphdr iph;
+	struct ip_vs_dest *dest;
+	struct ip_vs_conn *ct;
+	__be16  dport;			/* destination port to forward */
+	union nf_inet_addr snet;	/* source network of the client,
+					   after masking */
+
+	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+
+	/* Mask saddr with the netmask to adjust template granularity */
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
+	else
+#endif
+		snet.ip = iph.saddr.ip & svc->netmask;
+
+	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
+		      "mnet %s\n",
+		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
+		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+		      IP_VS_DBG_ADDR(svc->af, &snet));
+
+	/*
+	 * As far as we know, FTP is a very complicated network protocol, and
+	 * it uses control connection and data connections. For active FTP,
+	 * FTP server initialize data connection to the client, its source port
+	 * is often 20. For passive FTP, FTP server tells the clients the port
+	 * that it passively listens to,  and the client issues the data
+	 * connection. In the tunneling or direct routing mode, the load
+	 * balancer is on the client-to-server half of connection, the port
+	 * number is unknown to the load balancer. So, a conn template like
+	 * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
+	 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
+	 * is created for other persistent services.
+	 */
+	if (ports[1] == svc->port) {
+		/* Check if a template already exists */
+		if (svc->port != FTPPORT)
+			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+					     &iph.daddr, ports[1]);
+		else
+			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+					     &iph.daddr, 0);
+
+		if (!ct || !ip_vs_check_template(ct)) {
+			/*
+			 * No template found or the dest of the connection
+			 * template is not available.
+			 */
+			dest = svc->scheduler->schedule(svc, skb);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "p-schedule: no dest found.\n");
+				return NULL;
+			}
+
+			/*
+			 * Create a template like <protocol,caddr,0,
+			 * vaddr,vport,daddr,dport> for non-ftp service,
+			 * and <protocol,caddr,0,vaddr,0,daddr,0>
+			 * for ftp service.
+			 */
+			if (svc->port != FTPPORT)
+				ct = ip_vs_conn_new(svc->af, iph.protocol,
+						    &snet, 0,
+						    &iph.daddr,
+						    ports[1],
+						    &dest->addr, dest->port,
+						    IP_VS_CONN_F_TEMPLATE,
+						    dest);
+			else
+				ct = ip_vs_conn_new(svc->af, iph.protocol,
+						    &snet, 0,
+						    &iph.daddr, 0,
+						    &dest->addr, 0,
+						    IP_VS_CONN_F_TEMPLATE,
+						    dest);
+			if (ct == NULL)
+				return NULL;
+
+			ct->timeout = svc->timeout;
+		} else {
+			/* set destination with the found template */
+			dest = ct->dest;
+		}
+		dport = dest->port;
+	} else {
+		/*
+		 * Note: persistent fwmark-based services and persistent
+		 * port zero service are handled here.
+		 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
+		 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
+		 */
+		if (svc->fwmark) {
+			union nf_inet_addr fwmark = {
+				.all = { 0, 0, 0, htonl(svc->fwmark) }
+			};
+
+			ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
+					     &fwmark, 0);
+		} else
+			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+					     &iph.daddr, 0);
+
+		if (!ct || !ip_vs_check_template(ct)) {
+			/*
+			 * If it is not persistent port zero, return NULL,
+			 * otherwise create a connection template.
+			 */
+			if (svc->port)
+				return NULL;
+
+			dest = svc->scheduler->schedule(svc, skb);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "p-schedule: no dest found.\n");
+				return NULL;
+			}
+
+			/*
+			 * Create a template according to the service
+			 */
+			if (svc->fwmark) {
+				union nf_inet_addr fwmark = {
+					.all = { 0, 0, 0, htonl(svc->fwmark) }
+				};
+
+				ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
+						    &snet, 0,
+						    &fwmark, 0,
+						    &dest->addr, 0,
+						    IP_VS_CONN_F_TEMPLATE,
+						    dest);
+			} else
+				ct = ip_vs_conn_new(svc->af, iph.protocol,
+						    &snet, 0,
+						    &iph.daddr, 0,
+						    &dest->addr, 0,
+						    IP_VS_CONN_F_TEMPLATE,
+						    dest);
+			if (ct == NULL)
+				return NULL;
+
+			ct->timeout = svc->timeout;
+		} else {
+			/* set destination with the found template */
+			dest = ct->dest;
+		}
+		dport = ports[1];
+	}
+
+	/*
+	 *    Create a new connection according to the template
+	 */
+	cp = ip_vs_conn_new(svc->af, iph.protocol,
+			    &iph.saddr, ports[0],
+			    &iph.daddr, ports[1],
+			    &dest->addr, dport,
+			    0,
+			    dest);
+	if (cp == NULL) {
+		ip_vs_conn_put(ct);
+		return NULL;
+	}
+
+	/*
+	 *    Add its control
+	 */
+	ip_vs_control_add(cp, ct);
+	ip_vs_conn_put(ct);
+
+	ip_vs_conn_stats(cp, svc);
+	return cp;
+}
+
+
+/*
+ *  IPVS main scheduling function
+ *  It selects a server according to the virtual service, and
+ *  creates a connection entry.
+ *  Protocols supported: TCP, UDP
+ */
+struct ip_vs_conn *
+ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_conn *cp = NULL;
+	struct ip_vs_iphdr iph;
+	struct ip_vs_dest *dest;
+	__be16 _ports[2], *pptr;
+
+	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	/*
+	 *    Persistent service
+	 */
+	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+		return ip_vs_sched_persist(svc, skb, pptr);
+
+	/*
+	 *    Non-persistent service
+	 */
+	if (!svc->fwmark && pptr[1] != svc->port) {
+		if (!svc->port)
+			IP_VS_ERR("Schedule: port zero only supported "
+				  "in persistent services, "
+				  "check your ipvs configuration\n");
+		return NULL;
+	}
+
+	dest = svc->scheduler->schedule(svc, skb);
+	if (dest == NULL) {
+		IP_VS_DBG(1, "Schedule: no dest found.\n");
+		return NULL;
+	}
+
+	/*
+	 *    Create a connection entry.
+	 */
+	cp = ip_vs_conn_new(svc->af, iph.protocol,
+			    &iph.saddr, pptr[0],
+			    &iph.daddr, pptr[1],
+			    &dest->addr, dest->port ? dest->port : pptr[1],
+			    0,
+			    dest);
+	if (cp == NULL)
+		return NULL;
+
+	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
+		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+		      ip_vs_fwd_tag(cp),
+		      IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+		      cp->flags, atomic_read(&cp->refcnt));
+
+	ip_vs_conn_stats(cp, svc);
+	return cp;
+}
+
+
+/*
+ *  Pass or drop the packet.
+ *  Called by ip_vs_in, when the virtual service is available but
+ *  no destination is available for a new connection.
+ */
+int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
+		struct ip_vs_protocol *pp)
+{
+	__be16 _ports[2], *pptr;
+	struct ip_vs_iphdr iph;
+	int unicast;
+	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+
+	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
+	if (pptr == NULL) {
+		ip_vs_service_put(svc);
+		return NF_DROP;
+	}
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
+	else
+#endif
+		unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+
+	/* if it is fwmark-based service, the cache_bypass sysctl is up
+	   and the destination is a non-local unicast, then create
+	   a cache_bypass connection entry */
+	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+		int ret, cs;
+		struct ip_vs_conn *cp;
+		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
+
+		ip_vs_service_put(svc);
+
+		/* create a new connection entry */
+		IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
+		cp = ip_vs_conn_new(svc->af, iph.protocol,
+				    &iph.saddr, pptr[0],
+				    &iph.daddr, pptr[1],
+				    &daddr, 0,
+				    IP_VS_CONN_F_BYPASS,
+				    NULL);
+		if (cp == NULL)
+			return NF_DROP;
+
+		/* statistics */
+		ip_vs_in_stats(cp, skb);
+
+		/* set state */
+		cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+
+		/* transmit the first SYN packet */
+		ret = cp->packet_xmit(skb, cp, pp);
+		/* do not touch skb anymore */
+
+		atomic_inc(&cp->in_pkts);
+		ip_vs_conn_put(cp);
+		return ret;
+	}
+
+	/*
+	 * When the virtual ftp service is presented, packets destined
+	 * for other services on the VIP may get here (except services
+	 * listed in the ipvs table), pass the packets, because it is
+	 * not ipvs job to decide to drop the packets.
+	 */
+	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
+		ip_vs_service_put(svc);
+		return NF_ACCEPT;
+	}
+
+	ip_vs_service_put(svc);
+
+	/*
+	 * Notify the client that the destination is unreachable, and
+	 * release the socket buffer.
+	 * Since it is in IP layer, the TCP socket is not actually
+	 * created, the TCP RST packet cannot be sent, instead that
+	 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
+	 */
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
+			    skb->dev);
+	else
+#endif
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+	return NF_DROP;
+}
+
+
+/*
+ *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
+ *      chain, and is used for VS/NAT.
+ *      It detects packets for VS/NAT connections and sends the packets
+ *      immediately. This can avoid that iptable_nat mangles the packets
+ *      for VS/NAT.
+ */
+static unsigned int ip_vs_post_routing(unsigned int hooknum,
+				       struct sk_buff *skb,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	if (!skb->ipvs_property)
+		return NF_ACCEPT;
+	/* The packet was sent from IPVS, exit this chain */
+	return NF_STOP;
+}
+
+__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+{
+	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+}
+
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+	int err = ip_defrag(skb, user);
+
+	if (!err)
+		ip_send_check(ip_hdr(skb));
+
+	return err;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
+{
+	/* TODO IPv6: Find out what to do here for IPv6 */
+	return 0;
+}
+#endif
+
+/*
+ * Packet has been made sufficiently writable in caller
+ * - inout: 1=in->out, 0=out->in
+ */
+void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    struct ip_vs_conn *cp, int inout)
+{
+	struct iphdr *iph	 = ip_hdr(skb);
+	unsigned int icmp_offset = iph->ihl*4;
+	struct icmphdr *icmph	 = (struct icmphdr *)(skb_network_header(skb) +
+						      icmp_offset);
+	struct iphdr *ciph	 = (struct iphdr *)(icmph + 1);
+
+	if (inout) {
+		iph->saddr = cp->vaddr.ip;
+		ip_send_check(iph);
+		ciph->daddr = cp->vaddr.ip;
+		ip_send_check(ciph);
+	} else {
+		iph->daddr = cp->daddr.ip;
+		ip_send_check(iph);
+		ciph->saddr = cp->daddr.ip;
+		ip_send_check(ciph);
+	}
+
+	/* the TCP/UDP port */
+	if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
+		__be16 *ports = (void *)ciph + ciph->ihl*4;
+
+		if (inout)
+			ports[1] = cp->vport;
+		else
+			ports[0] = cp->dport;
+	}
+
+	/* And finally the ICMP checksum */
+	icmph->checksum = 0;
+	icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (inout)
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered outgoing ICMP");
+	else
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered incoming ICMP");
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    struct ip_vs_conn *cp, int inout)
+{
+	struct ipv6hdr *iph	 = ipv6_hdr(skb);
+	unsigned int icmp_offset = sizeof(struct ipv6hdr);
+	struct icmp6hdr *icmph	 = (struct icmp6hdr *)(skb_network_header(skb) +
+						      icmp_offset);
+	struct ipv6hdr *ciph	 = (struct ipv6hdr *)(icmph + 1);
+
+	if (inout) {
+		iph->saddr = cp->vaddr.in6;
+		ciph->daddr = cp->vaddr.in6;
+	} else {
+		iph->daddr = cp->daddr.in6;
+		ciph->saddr = cp->daddr.in6;
+	}
+
+	/* the TCP/UDP port */
+	if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+		__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
+
+		if (inout)
+			ports[1] = cp->vport;
+		else
+			ports[0] = cp->dport;
+	}
+
+	/* And finally the ICMP checksum */
+	icmph->icmp6_cksum = 0;
+	/* TODO IPv6: is this correct for ICMPv6? */
+	ip_vs_checksum_complete(skb, icmp_offset);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (inout)
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered outgoing ICMPv6");
+	else
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered incoming ICMPv6");
+}
+#endif
+
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host. Used for NAT and local client.
+ */
+static int handle_response_icmp(int af, struct sk_buff *skb,
+				union nf_inet_addr *snet,
+				__u8 protocol, struct ip_vs_conn *cp,
+				struct ip_vs_protocol *pp,
+				unsigned int offset, unsigned int ihl)
+{
+	unsigned int verdict = NF_DROP;
+
+	if (IP_VS_FWD_METHOD(cp) != 0) {
+		IP_VS_ERR("shouldn't reach here, because the box is on the "
+			  "half connection in the tun/dr module.\n");
+	}
+
+	/* Ensure the checksum is correct */
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n",
+			      IP_VS_DBG_ADDR(af, snet));
+		goto out;
+	}
+
+	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+		offset += 2 * sizeof(__u16);
+	if (!skb_make_writable(skb, offset))
+		goto out;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_nat_icmp_v6(skb, pp, cp, 1);
+	else
+#endif
+		ip_vs_nat_icmp(skb, pp, cp, 1);
+
+	/* do the statistics and put it back */
+	ip_vs_out_stats(cp, skb);
+
+	skb->ipvs_property = 1;
+	verdict = NF_ACCEPT;
+
+out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
+/*
+ *	Handle ICMP messages in the inside-to-outside direction (outgoing).
+ *	Find any that might be relevant, check against existing connections.
+ *	Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
+{
+	struct iphdr *iph;
+	struct icmphdr	_icmph, *ic;
+	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, ihl;
+	union nf_inet_addr snet;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+			return NF_STOLEN;
+	}
+
+	iph = ip_hdr(skb);
+	offset = ihl = iph->ihl * 4;
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
+		  ic->type, ntohs(icmp_id(ic)),
+		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->type != ICMP_DEST_UNREACH) &&
+	    (ic->type != ICMP_SOURCE_QUENCH) &&
+	    (ic->type != ICMP_TIME_EXCEEDED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->protocol);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
+		     pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
+
+	offset += cih->ihl * 4;
+
+	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+	if (!cp)
+		return NF_ACCEPT;
+
+	snet.ip = iph->saddr;
+	return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
+				    pp, offset, ihl);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
+{
+	struct ipv6hdr *iph;
+	struct icmp6hdr	_icmph, *ic;
+	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
+					   within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset;
+	union nf_inet_addr snet;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+		if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
+			return NF_STOLEN;
+	}
+
+	iph = ipv6_hdr(skb);
+	offset = sizeof(struct ipv6hdr);
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
+		  NIP6(iph->saddr), NIP6(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+	    (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+	    (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->nexthdr);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	/* TODO: we don't support fragmentation at the moment anyways */
+	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
+
+	offset += sizeof(struct ipv6hdr);
+
+	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+	if (!cp)
+		return NF_ACCEPT;
+
+	ipv6_addr_copy(&snet.in6, &iph->saddr);
+	return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
+				    pp, offset, sizeof(struct ipv6hdr));
+}
+#endif
+
+static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
+{
+	struct tcphdr _tcph, *th;
+
+	th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return 0;
+	return th->rst;
+}
+
+/* Handle response packets: rewrite addresses and send away...
+ * Used for NAT and local client.
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		struct ip_vs_conn *cp, int ihl)
+{
+	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+	if (!skb_make_writable(skb, ihl))
+		goto drop;
+
+	/* mangle the packet */
+	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+		goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+	else
+#endif
+	{
+		ip_hdr(skb)->saddr = cp->vaddr.ip;
+		ip_send_check(ip_hdr(skb));
+	}
+
+	/* For policy routing, packets originating from this
+	 * machine itself may be routed differently to packets
+	 * passing through.  We want this packet to be routed as
+	 * if it came from this machine itself.  So re-compute
+	 * the routing information.
+	 */
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (ip6_route_me_harder(skb) != 0)
+			goto drop;
+	} else
+#endif
+		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+			goto drop;
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+	ip_vs_out_stats(cp, skb);
+	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_conn_put(cp);
+
+	skb->ipvs_property = 1;
+
+	LeaveFunction(11);
+	return NF_ACCEPT;
+
+drop:
+	ip_vs_conn_put(cp);
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+
+/*
+ *	It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
+ *	Check if outgoing packet belongs to the established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
+	  const struct net_device *in, const struct net_device *out,
+	  int (*okfn)(struct sk_buff *))
+{
+	struct ip_vs_iphdr iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn *cp;
+	int af;
+
+	EnterFunction(11);
+
+	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
+	if (skb->ipvs_property)
+		return NF_ACCEPT;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+			int related, verdict = ip_vs_out_icmp_v6(skb, &related);
+
+			if (related)
+				return verdict;
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+	} else
+#endif
+		if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+			int related, verdict = ip_vs_out_icmp(skb, &related);
+
+			if (related)
+				return verdict;
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+
+	pp = ip_vs_proto_get(iph.protocol);
+	if (unlikely(!pp))
+		return NF_ACCEPT;
+
+	/* reassemble IP fragments */
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+			int related, verdict = ip_vs_out_icmp_v6(skb, &related);
+
+			if (related)
+				return verdict;
+
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+	} else
+#endif
+		if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
+			     !pp->dont_defrag)) {
+			if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+				return NF_STOLEN;
+
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+
+	/*
+	 * Check if the packet belongs to an existing entry
+	 */
+	cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+
+	if (unlikely(!cp)) {
+		if (sysctl_ip_vs_nat_icmp_send &&
+		    (pp->protocol == IPPROTO_TCP ||
+		     pp->protocol == IPPROTO_UDP)) {
+			__be16 _ports[2], *pptr;
+
+			pptr = skb_header_pointer(skb, iph.len,
+						  sizeof(_ports), _ports);
+			if (pptr == NULL)
+				return NF_ACCEPT;	/* Not for me */
+			if (ip_vs_lookup_real_service(af, iph.protocol,
+						      &iph.saddr,
+						      pptr[0])) {
+				/*
+				 * Notify the real server: there is no
+				 * existing entry if it is not RST
+				 * packet or not TCP packet.
+				 */
+				if (iph.protocol != IPPROTO_TCP
+				    || !is_tcp_reset(skb, iph.len)) {
+#ifdef CONFIG_IP_VS_IPV6
+					if (af == AF_INET6)
+						icmpv6_send(skb,
+							    ICMPV6_DEST_UNREACH,
+							    ICMPV6_PORT_UNREACH,
+							    0, skb->dev);
+					else
+#endif
+						icmp_send(skb,
+							  ICMP_DEST_UNREACH,
+							  ICMP_PORT_UNREACH, 0);
+					return NF_DROP;
+				}
+			}
+		}
+		IP_VS_DBG_PKT(12, pp, skb, 0,
+			      "packet continues traversal as normal");
+		return NF_ACCEPT;
+	}
+
+	return handle_response(af, skb, pp, cp, iph.len);
+}
+
+
+/*
+ *	Handle ICMP messages in the outside-to-inside direction (incoming).
+ *	Find any that might be relevant, check against existing connections,
+ *	forward to the right destination host if relevant.
+ *	Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+static int
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
+{
+	struct iphdr *iph;
+	struct icmphdr	_icmph, *ic;
+	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, ihl, verdict;
+	union nf_inet_addr snet;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+		if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
+					    IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
+			return NF_STOLEN;
+	}
+
+	iph = ip_hdr(skb);
+	offset = ihl = iph->ihl * 4;
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
+		  ic->type, ntohs(icmp_id(ic)),
+		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->type != ICMP_DEST_UNREACH) &&
+	    (ic->type != ICMP_SOURCE_QUENCH) &&
+	    (ic->type != ICMP_TIME_EXCEEDED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->protocol);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
+		     pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
+
+	offset += cih->ihl * 4;
+
+	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+	if (!cp) {
+		/* The packet could also belong to a local client */
+		cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+		if (cp) {
+			snet.ip = iph->saddr;
+			return handle_response_icmp(AF_INET, skb, &snet,
+						    cih->protocol, cp, pp,
+						    offset, ihl);
+		}
+		return NF_ACCEPT;
+	}
+
+	verdict = NF_DROP;
+
+	/* Ensure the checksum is correct */
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
+			  NIPQUAD(iph->saddr));
+		goto out;
+	}
+
+	/* do the statistics and put it back */
+	ip_vs_in_stats(cp, skb);
+	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+		offset += 2 * sizeof(__u16);
+	verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
+	/* do not touch skb anymore */
+
+  out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static int
+ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
+{
+	struct ipv6hdr *iph;
+	struct icmp6hdr	_icmph, *ic;
+	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
+					   within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, verdict;
+	union nf_inet_addr snet;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+		if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
+					       IP_DEFRAG_VS_IN :
+					       IP_DEFRAG_VS_FWD))
+			return NF_STOLEN;
+	}
+
+	iph = ipv6_hdr(skb);
+	offset = sizeof(struct ipv6hdr);
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
+		  NIP6(iph->saddr), NIP6(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+	    (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+	    (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->nexthdr);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	/* TODO: we don't support fragmentation at the moment anyways */
+	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
+
+	offset += sizeof(struct ipv6hdr);
+
+	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+	if (!cp) {
+		/* The packet could also belong to a local client */
+		cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+		if (cp) {
+			ipv6_addr_copy(&snet.in6, &iph->saddr);
+			return handle_response_icmp(AF_INET6, skb, &snet,
+						    cih->nexthdr,
+						    cp, pp, offset,
+						    sizeof(struct ipv6hdr));
+		}
+		return NF_ACCEPT;
+	}
+
+	verdict = NF_DROP;
+
+	/* do the statistics and put it back */
+	ip_vs_in_stats(cp, skb);
+	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+		offset += 2 * sizeof(__u16);
+	verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
+	/* do not touch skb anymore */
+
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+#endif
+
+
+/*
+ *	Check if it's for virtual services, look it up,
+ *	and send it on its way...
+ */
+static unsigned int
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
+	 const struct net_device *in, const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	struct ip_vs_iphdr iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn *cp;
+	int ret, restart, af;
+
+	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+	/*
+	 *	Big tappo: only PACKET_HOST, including loopback for local client
+	 *	Don't handle local packets on IPv6 for now
+	 */
+	if (unlikely(skb->pkt_type != PACKET_HOST)) {
+		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
+			      skb->pkt_type,
+			      iph.protocol,
+			      IP_VS_DBG_ADDR(af, &iph.daddr));
+		return NF_ACCEPT;
+	}
+
+	if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+		int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
+
+		if (related)
+			return verdict;
+		ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+	}
+
+	/* Protocol supported? */
+	pp = ip_vs_proto_get(iph.protocol);
+	if (unlikely(!pp))
+		return NF_ACCEPT;
+
+	/*
+	 * Check if the packet belongs to an existing connection entry
+	 */
+	cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+
+	if (unlikely(!cp)) {
+		int v;
+
+		/* For local client packets, it could be a response */
+		cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+		if (cp)
+			return handle_response(af, skb, pp, cp, iph.len);
+
+		if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+			return v;
+	}
+
+	if (unlikely(!cp)) {
+		/* sorry, all this trouble for a no-hit :) */
+		IP_VS_DBG_PKT(12, pp, skb, 0,
+			      "packet continues traversal as normal");
+		return NF_ACCEPT;
+	}
+
+	IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
+
+	/* Check the server status */
+	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		/* the destination server is not available */
+
+		if (sysctl_ip_vs_expire_nodest_conn) {
+			/* try to expire the connection immediately */
+			ip_vs_conn_expire_now(cp);
+		}
+		/* don't restart its timer, and silently
+		   drop the packet. */
+		__ip_vs_conn_put(cp);
+		return NF_DROP;
+	}
+
+	ip_vs_in_stats(cp, skb);
+	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+	if (cp->packet_xmit)
+		ret = cp->packet_xmit(skb, cp, pp);
+		/* do not touch skb anymore */
+	else {
+		IP_VS_DBG_RL("warning: packet_xmit is null");
+		ret = NF_ACCEPT;
+	}
+
+	/* Increase its packet counter and check if it is needed
+	 * to be synchronized
+	 *
+	 * Sync connection if it is about to close to
+	 * encorage the standby servers to update the connections timeout
+	 */
+	atomic_inc(&cp->in_pkts);
+	if (af == AF_INET &&
+	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	    (((cp->protocol != IPPROTO_TCP ||
+	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
+	      (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
+	       == sysctl_ip_vs_sync_threshold[0])) ||
+	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
+	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
+	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
+	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
+		ip_vs_sync_conn(cp);
+	cp->old_state = cp->state;
+
+	ip_vs_conn_put(cp);
+	return ret;
+}
+
+
+/*
+ *	It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
+ *      related packets destined for 0.0.0.0/0.
+ *      When fwmark-based virtual service is used, such as transparent
+ *      cache cluster, TCP packets can be marked and routed to ip_vs_in,
+ *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
+ *      sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
+ *      and send them to ip_vs_in_icmp.
+ */
+static unsigned int
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
+		   const struct net_device *in, const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	int r;
+
+	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
+		return NF_ACCEPT;
+
+	return ip_vs_in_icmp(skb, &r, hooknum);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static unsigned int
+ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+		      const struct net_device *in, const struct net_device *out,
+		      int (*okfn)(struct sk_buff *))
+{
+	int r;
+
+	if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+		return NF_ACCEPT;
+
+	return ip_vs_in_icmp_v6(skb, &r, hooknum);
+}
+#endif
+
+
+static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+	/* After packet filtering, forward packet through VS/DR, VS/TUN,
+	 * or VS/NAT(change destination), so that filtering rules can be
+	 * applied to IPVS. */
+	{
+		.hook		= ip_vs_in,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_LOCAL_IN,
+		.priority       = 100,
+	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 100,
+	},
+	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+	{
+		.hook		= ip_vs_forward_icmp,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 99,
+	},
+	/* Before the netfilter connection tracking, exit from POST_ROUTING */
+	{
+		.hook		= ip_vs_post_routing,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_POST_ROUTING,
+		.priority       = NF_IP_PRI_NAT_SRC-1,
+	},
+#ifdef CONFIG_IP_VS_IPV6
+	/* After packet filtering, forward packet through VS/DR, VS/TUN,
+	 * or VS/NAT(change destination), so that filtering rules can be
+	 * applied to IPVS. */
+	{
+		.hook		= ip_vs_in,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_LOCAL_IN,
+		.priority       = 100,
+	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 100,
+	},
+	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+	{
+		.hook		= ip_vs_forward_icmp_v6,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 99,
+	},
+	/* Before the netfilter connection tracking, exit from POST_ROUTING */
+	{
+		.hook		= ip_vs_post_routing,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_POST_ROUTING,
+		.priority       = NF_IP6_PRI_NAT_SRC-1,
+	},
+#endif
+};
+
+
+/*
+ *	Initialize IP Virtual Server
+ */
+static int __init ip_vs_init(void)
+{
+	int ret;
+
+	ip_vs_estimator_init();
+
+	ret = ip_vs_control_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup control.\n");
+		goto cleanup_estimator;
+	}
+
+	ip_vs_protocol_init();
+
+	ret = ip_vs_app_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup application helper.\n");
+		goto cleanup_protocol;
+	}
+
+	ret = ip_vs_conn_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup connection table.\n");
+		goto cleanup_app;
+	}
+
+	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	if (ret < 0) {
+		IP_VS_ERR("can't register hooks.\n");
+		goto cleanup_conn;
+	}
+
+	IP_VS_INFO("ipvs loaded.\n");
+	return ret;
+
+  cleanup_conn:
+	ip_vs_conn_cleanup();
+  cleanup_app:
+	ip_vs_app_cleanup();
+  cleanup_protocol:
+	ip_vs_protocol_cleanup();
+	ip_vs_control_cleanup();
+  cleanup_estimator:
+	ip_vs_estimator_cleanup();
+	return ret;
+}
+
+static void __exit ip_vs_cleanup(void)
+{
+	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	ip_vs_conn_cleanup();
+	ip_vs_app_cleanup();
+	ip_vs_protocol_cleanup();
+	ip_vs_control_cleanup();
+	ip_vs_estimator_cleanup();
+	IP_VS_INFO("ipvs unloaded.\n");
+}
+
+module_init(ip_vs_init);
+module_exit(ip_vs_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
new file mode 100644
index 00000000000..0302cf3e503
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -0,0 +1,3443 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/swap.h>
+#include <linux/seq_file.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/mutex.h>
+
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#endif
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/genetlink.h>
+
+#include <asm/uaccess.h>
+
+#include <net/ip_vs.h>
+
+/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+static DEFINE_MUTEX(__ip_vs_mutex);
+
+/* lock for service table */
+static DEFINE_RWLOCK(__ip_vs_svc_lock);
+
+/* lock for table with the real services */
+static DEFINE_RWLOCK(__ip_vs_rs_lock);
+
+/* lock for state and timeout tables */
+static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
+
+/* lock for drop entry handling */
+static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
+
+/* lock for drop packet handling */
+static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
+
+/* 1/rate drop and drop-entry variables */
+int ip_vs_drop_rate = 0;
+int ip_vs_drop_counter = 0;
+static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
+
+/* number of virtual services */
+static int ip_vs_num_services = 0;
+
+/* sysctl variables */
+static int sysctl_ip_vs_drop_entry = 0;
+static int sysctl_ip_vs_drop_packet = 0;
+static int sysctl_ip_vs_secure_tcp = 0;
+static int sysctl_ip_vs_amemthresh = 1024;
+static int sysctl_ip_vs_am_droprate = 10;
+int sysctl_ip_vs_cache_bypass = 0;
+int sysctl_ip_vs_expire_nodest_conn = 0;
+int sysctl_ip_vs_expire_quiescent_template = 0;
+int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
+int sysctl_ip_vs_nat_icmp_send = 0;
+
+
+#ifdef CONFIG_IP_VS_DEBUG
+static int sysctl_ip_vs_debug_level = 0;
+
+int ip_vs_get_debug_level(void)
+{
+	return sysctl_ip_vs_debug_level;
+}
+#endif
+
+#ifdef CONFIG_IP_VS_IPV6
+/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
+static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+{
+	struct rt6_info *rt;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *addr,
+				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+	};
+
+	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
+			return 1;
+
+	return 0;
+}
+#endif
+/*
+ *	update_defense_level is called from keventd and from sysctl,
+ *	so it needs to protect itself from softirqs
+ */
+static void update_defense_level(void)
+{
+	struct sysinfo i;
+	static int old_secure_tcp = 0;
+	int availmem;
+	int nomem;
+	int to_change = -1;
+
+	/* we only count free and buffered memory (in pages) */
+	si_meminfo(&i);
+	availmem = i.freeram + i.bufferram;
+	/* however in linux 2.5 the i.bufferram is total page cache size,
+	   we need adjust it */
+	/* si_swapinfo(&i); */
+	/* availmem = availmem - (i.totalswap - i.freeswap); */
+
+	nomem = (availmem < sysctl_ip_vs_amemthresh);
+
+	local_bh_disable();
+
+	/* drop_entry */
+	spin_lock(&__ip_vs_dropentry_lock);
+	switch (sysctl_ip_vs_drop_entry) {
+	case 0:
+		atomic_set(&ip_vs_dropentry, 0);
+		break;
+	case 1:
+		if (nomem) {
+			atomic_set(&ip_vs_dropentry, 1);
+			sysctl_ip_vs_drop_entry = 2;
+		} else {
+			atomic_set(&ip_vs_dropentry, 0);
+		}
+		break;
+	case 2:
+		if (nomem) {
+			atomic_set(&ip_vs_dropentry, 1);
+		} else {
+			atomic_set(&ip_vs_dropentry, 0);
+			sysctl_ip_vs_drop_entry = 1;
+		};
+		break;
+	case 3:
+		atomic_set(&ip_vs_dropentry, 1);
+		break;
+	}
+	spin_unlock(&__ip_vs_dropentry_lock);
+
+	/* drop_packet */
+	spin_lock(&__ip_vs_droppacket_lock);
+	switch (sysctl_ip_vs_drop_packet) {
+	case 0:
+		ip_vs_drop_rate = 0;
+		break;
+	case 1:
+		if (nomem) {
+			ip_vs_drop_rate = ip_vs_drop_counter
+				= sysctl_ip_vs_amemthresh /
+				(sysctl_ip_vs_amemthresh-availmem);
+			sysctl_ip_vs_drop_packet = 2;
+		} else {
+			ip_vs_drop_rate = 0;
+		}
+		break;
+	case 2:
+		if (nomem) {
+			ip_vs_drop_rate = ip_vs_drop_counter
+				= sysctl_ip_vs_amemthresh /
+				(sysctl_ip_vs_amemthresh-availmem);
+		} else {
+			ip_vs_drop_rate = 0;
+			sysctl_ip_vs_drop_packet = 1;
+		}
+		break;
+	case 3:
+		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+		break;
+	}
+	spin_unlock(&__ip_vs_droppacket_lock);
+
+	/* secure_tcp */
+	write_lock(&__ip_vs_securetcp_lock);
+	switch (sysctl_ip_vs_secure_tcp) {
+	case 0:
+		if (old_secure_tcp >= 2)
+			to_change = 0;
+		break;
+	case 1:
+		if (nomem) {
+			if (old_secure_tcp < 2)
+				to_change = 1;
+			sysctl_ip_vs_secure_tcp = 2;
+		} else {
+			if (old_secure_tcp >= 2)
+				to_change = 0;
+		}
+		break;
+	case 2:
+		if (nomem) {
+			if (old_secure_tcp < 2)
+				to_change = 1;
+		} else {
+			if (old_secure_tcp >= 2)
+				to_change = 0;
+			sysctl_ip_vs_secure_tcp = 1;
+		}
+		break;
+	case 3:
+		if (old_secure_tcp < 2)
+			to_change = 1;
+		break;
+	}
+	old_secure_tcp = sysctl_ip_vs_secure_tcp;
+	if (to_change >= 0)
+		ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
+	write_unlock(&__ip_vs_securetcp_lock);
+
+	local_bh_enable();
+}
+
+
+/*
+ *	Timer for checking the defense
+ */
+#define DEFENSE_TIMER_PERIOD	1*HZ
+static void defense_work_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
+
+static void defense_work_handler(struct work_struct *work)
+{
+	update_defense_level();
+	if (atomic_read(&ip_vs_dropentry))
+		ip_vs_random_dropentry();
+
+	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+}
+
+int
+ip_vs_use_count_inc(void)
+{
+	return try_module_get(THIS_MODULE);
+}
+
+void
+ip_vs_use_count_dec(void)
+{
+	module_put(THIS_MODULE);
+}
+
+
+/*
+ *	Hash table: for virtual service lookups
+ */
+#define IP_VS_SVC_TAB_BITS 8
+#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+
+/* the service table hashed by <protocol, addr, port> */
+static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
+/* the service table hashed by fwmark */
+static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
+
+/*
+ *	Hash table: for real service lookups
+ */
+#define IP_VS_RTAB_BITS 4
+#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
+
+/*
+ *	Trash for destinations
+ */
+static LIST_HEAD(ip_vs_dest_trash);
+
+/*
+ *	FTP & NULL virtual service counters
+ */
+static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
+static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
+
+
+/*
+ *	Returns hash value for virtual service
+ */
+static __inline__ unsigned
+ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
+		  __be16 port)
+{
+	register unsigned porth = ntohs(port);
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+
+	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
+		& IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *	Returns hash value of fwmark for virtual service lookup
+ */
+static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+{
+	return fwmark & IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *	Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ *	or in the ip_vs_svc_fwm_table by fwmark.
+ *	Should be called with locked tables.
+ */
+static int ip_vs_svc_hash(struct ip_vs_service *svc)
+{
+	unsigned hash;
+
+	if (svc->flags & IP_VS_SVC_F_HASHED) {
+		IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/*
+		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+		 */
+		hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
+					 svc->port);
+		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+	} else {
+		/*
+		 *  Hash it by fwmark in ip_vs_svc_fwm_table
+		 */
+		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+	}
+
+	svc->flags |= IP_VS_SVC_F_HASHED;
+	/* increase its refcnt because it is referenced by the svc table */
+	atomic_inc(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *	Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *	Should be called with locked tables.
+ */
+static int ip_vs_svc_unhash(struct ip_vs_service *svc)
+{
+	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
+		IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/* Remove it from the ip_vs_svc_table table */
+		list_del(&svc->s_list);
+	} else {
+		/* Remove it from the ip_vs_svc_fwm_table table */
+		list_del(&svc->f_list);
+	}
+
+	svc->flags &= ~IP_VS_SVC_F_HASHED;
+	atomic_dec(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *	Get service by {proto,addr,port} in the service table.
+ */
+static inline struct ip_vs_service *
+__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
+		    __be16 vport)
+{
+	unsigned hash;
+	struct ip_vs_service *svc;
+
+	/* Check for "full" addressed entries */
+	hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+
+	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
+		if ((svc->af == af)
+		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
+		    && (svc->port == vport)
+		    && (svc->protocol == protocol)) {
+			/* HIT */
+			atomic_inc(&svc->usecnt);
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *	Get service by {fwmark} in the service table.
+ */
+static inline struct ip_vs_service *
+__ip_vs_svc_fwm_get(int af, __u32 fwmark)
+{
+	unsigned hash;
+	struct ip_vs_service *svc;
+
+	/* Check for fwmark addressed entries */
+	hash = ip_vs_svc_fwm_hashkey(fwmark);
+
+	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+		if (svc->fwmark == fwmark && svc->af == af) {
+			/* HIT */
+			atomic_inc(&svc->usecnt);
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+struct ip_vs_service *
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+		  const union nf_inet_addr *vaddr, __be16 vport)
+{
+	struct ip_vs_service *svc;
+
+	read_lock(&__ip_vs_svc_lock);
+
+	/*
+	 *	Check the table hashed by fwmark first
+	 */
+	if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
+		goto out;
+
+	/*
+	 *	Check the table hashed by <protocol,addr,port>
+	 *	for "full" addressed entries
+	 */
+	svc = __ip_vs_service_get(af, protocol, vaddr, vport);
+
+	if (svc == NULL
+	    && protocol == IPPROTO_TCP
+	    && atomic_read(&ip_vs_ftpsvc_counter)
+	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
+		/*
+		 * Check if ftp service entry exists, the packet
+		 * might belong to FTP data connections.
+		 */
+		svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
+	}
+
+	if (svc == NULL
+	    && atomic_read(&ip_vs_nullsvc_counter)) {
+		/*
+		 * Check if the catch-all port (port zero) exists
+		 */
+		svc = __ip_vs_service_get(af, protocol, vaddr, 0);
+	}
+
+  out:
+	read_unlock(&__ip_vs_svc_lock);
+
+	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
+		      fwmark, ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
+		      svc ? "hit" : "not hit");
+
+	return svc;
+}
+
+
+static inline void
+__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	atomic_inc(&svc->refcnt);
+	dest->svc = svc;
+}
+
+static inline void
+__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+{
+	struct ip_vs_service *svc = dest->svc;
+
+	dest->svc = NULL;
+	if (atomic_dec_and_test(&svc->refcnt))
+		kfree(svc);
+}
+
+
+/*
+ *	Returns hash value for real service
+ */
+static inline unsigned ip_vs_rs_hashkey(int af,
+					    const union nf_inet_addr *addr,
+					    __be16 port)
+{
+	register unsigned porth = ntohs(port);
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+
+	return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
+		& IP_VS_RTAB_MASK;
+}
+
+/*
+ *	Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *	should be called with locked tables.
+ */
+static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+{
+	unsigned hash;
+
+	if (!list_empty(&dest->d_list)) {
+		return 0;
+	}
+
+	/*
+	 *	Hash by proto,addr,port,
+	 *	which are the parameters of the real service.
+	 */
+	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+
+	list_add(&dest->d_list, &ip_vs_rtable[hash]);
+
+	return 1;
+}
+
+/*
+ *	UNhashes ip_vs_dest from ip_vs_rtable.
+ *	should be called with locked tables.
+ */
+static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
+{
+	/*
+	 * Remove it from the ip_vs_rtable table.
+	 */
+	if (!list_empty(&dest->d_list)) {
+		list_del(&dest->d_list);
+		INIT_LIST_HEAD(&dest->d_list);
+	}
+
+	return 1;
+}
+
+/*
+ *	Lookup real service by <proto,addr,port> in the real service table.
+ */
+struct ip_vs_dest *
+ip_vs_lookup_real_service(int af, __u16 protocol,
+			  const union nf_inet_addr *daddr,
+			  __be16 dport)
+{
+	unsigned hash;
+	struct ip_vs_dest *dest;
+
+	/*
+	 *	Check for "full" addressed entries
+	 *	Return the first found entry
+	 */
+	hash = ip_vs_rs_hashkey(af, daddr, dport);
+
+	read_lock(&__ip_vs_rs_lock);
+	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+		if ((dest->af == af)
+		    && ip_vs_addr_equal(af, &dest->addr, daddr)
+		    && (dest->port == dport)
+		    && ((dest->protocol == protocol) ||
+			dest->vfwmark)) {
+			/* HIT */
+			read_unlock(&__ip_vs_rs_lock);
+			return dest;
+		}
+	}
+	read_unlock(&__ip_vs_rs_lock);
+
+	return NULL;
+}
+
+/*
+ *	Lookup destination by {addr,port} in the given service
+ */
+static struct ip_vs_dest *
+ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+		  __be16 dport)
+{
+	struct ip_vs_dest *dest;
+
+	/*
+	 * Find the destination for the given service
+	 */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if ((dest->af == svc->af)
+		    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
+		    && (dest->port == dport)) {
+			/* HIT */
+			return dest;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Find destination by {daddr,dport,vaddr,protocol}
+ * Cretaed to be used in ip_vs_process_message() in
+ * the backup synchronization daemon. It finds the
+ * destination to be bound to the received connection
+ * on the backup.
+ *
+ * ip_vs_lookup_real_service() looked promissing, but
+ * seems not working as expected.
+ */
+struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+				   __be16 dport,
+				   const union nf_inet_addr *vaddr,
+				   __be16 vport, __u16 protocol)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_service *svc;
+
+	svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+	if (!svc)
+		return NULL;
+	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	if (dest)
+		atomic_inc(&dest->refcnt);
+	ip_vs_service_put(svc);
+	return dest;
+}
+
+/*
+ *  Lookup dest by {svc,addr,port} in the destination trash.
+ *  The destination trash is used to hold the destinations that are removed
+ *  from the service table but are still referenced by some conn entries.
+ *  The reason to add the destination trash is when the dest is temporary
+ *  down (either by administrator or by monitor program), the dest can be
+ *  picked back from the trash, the remaining connections to the dest can
+ *  continue, and the counting information of the dest is also useful for
+ *  scheduling.
+ */
+static struct ip_vs_dest *
+ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+		     __be16 dport)
+{
+	struct ip_vs_dest *dest, *nxt;
+
+	/*
+	 * Find the destination in trash
+	 */
+	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
+			      "dest->refcnt=%d\n",
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+			      ntohs(dest->port),
+			      atomic_read(&dest->refcnt));
+		if (dest->af == svc->af &&
+		    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
+		    dest->port == dport &&
+		    dest->vfwmark == svc->fwmark &&
+		    dest->protocol == svc->protocol &&
+		    (svc->fwmark ||
+		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
+		      dest->vport == svc->port))) {
+			/* HIT */
+			return dest;
+		}
+
+		/*
+		 * Try to purge the destination from trash if not referenced
+		 */
+		if (atomic_read(&dest->refcnt) == 1) {
+			IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
+				      "from trash\n",
+				      dest->vfwmark,
+				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+				      ntohs(dest->port));
+			list_del(&dest->n_list);
+			ip_vs_dst_reset(dest);
+			__ip_vs_unbind_svc(dest);
+			kfree(dest);
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *  Clean up all the destinations in the trash
+ *  Called by the ip_vs_control_cleanup()
+ *
+ *  When the ip_vs_control_clearup is activated by ipvs module exit,
+ *  the service tables must have been flushed and all the connections
+ *  are expired, and the refcnt of each destination in the trash must
+ *  be 1, so we simply release them here.
+ */
+static void ip_vs_trash_cleanup(void)
+{
+	struct ip_vs_dest *dest, *nxt;
+
+	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+		list_del(&dest->n_list);
+		ip_vs_dst_reset(dest);
+		__ip_vs_unbind_svc(dest);
+		kfree(dest);
+	}
+}
+
+
+static void
+ip_vs_zero_stats(struct ip_vs_stats *stats)
+{
+	spin_lock_bh(&stats->lock);
+
+	memset(&stats->ustats, 0, sizeof(stats->ustats));
+	ip_vs_zero_estimator(stats);
+
+	spin_unlock_bh(&stats->lock);
+}
+
+/*
+ *	Update a destination in the given service
+ */
+static void
+__ip_vs_update_dest(struct ip_vs_service *svc,
+		    struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
+{
+	int conn_flags;
+
+	/* set the weight and the flags */
+	atomic_set(&dest->weight, udest->weight);
+	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
+
+	/* check if local node and update the flags */
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6) {
+		if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
+			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+				| IP_VS_CONN_F_LOCALNODE;
+		}
+	} else
+#endif
+		if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
+			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+				| IP_VS_CONN_F_LOCALNODE;
+		}
+
+	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
+	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
+		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
+	} else {
+		/*
+		 *    Put the real service in ip_vs_rtable if not present.
+		 *    For now only for NAT!
+		 */
+		write_lock_bh(&__ip_vs_rs_lock);
+		ip_vs_rs_hash(dest);
+		write_unlock_bh(&__ip_vs_rs_lock);
+	}
+	atomic_set(&dest->conn_flags, conn_flags);
+
+	/* bind the service */
+	if (!dest->svc) {
+		__ip_vs_bind_svc(dest, svc);
+	} else {
+		if (dest->svc != svc) {
+			__ip_vs_unbind_svc(dest);
+			ip_vs_zero_stats(&dest->stats);
+			__ip_vs_bind_svc(dest, svc);
+		}
+	}
+
+	/* set the dest status flags */
+	dest->flags |= IP_VS_DEST_F_AVAILABLE;
+
+	if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
+		dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	dest->u_threshold = udest->u_threshold;
+	dest->l_threshold = udest->l_threshold;
+}
+
+
+/*
+ *	Create a destination for the given service
+ */
+static int
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
+	       struct ip_vs_dest **dest_p)
+{
+	struct ip_vs_dest *dest;
+	unsigned atype;
+
+	EnterFunction(2);
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6) {
+		atype = ipv6_addr_type(&udest->addr.in6);
+		if ((!(atype & IPV6_ADDR_UNICAST) ||
+			atype & IPV6_ADDR_LINKLOCAL) &&
+			!__ip_vs_addr_is_local_v6(&udest->addr.in6))
+			return -EINVAL;
+	} else
+#endif
+	{
+		atype = inet_addr_type(&init_net, udest->addr.ip);
+		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+			return -EINVAL;
+	}
+
+	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
+	if (dest == NULL) {
+		IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
+		return -ENOMEM;
+	}
+
+	dest->af = svc->af;
+	dest->protocol = svc->protocol;
+	dest->vaddr = svc->addr;
+	dest->vport = svc->port;
+	dest->vfwmark = svc->fwmark;
+	ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
+	dest->port = udest->port;
+
+	atomic_set(&dest->activeconns, 0);
+	atomic_set(&dest->inactconns, 0);
+	atomic_set(&dest->persistconns, 0);
+	atomic_set(&dest->refcnt, 0);
+
+	INIT_LIST_HEAD(&dest->d_list);
+	spin_lock_init(&dest->dst_lock);
+	spin_lock_init(&dest->stats.lock);
+	__ip_vs_update_dest(svc, dest, udest);
+	ip_vs_new_estimator(&dest->stats);
+
+	*dest_p = dest;
+
+	LeaveFunction(2);
+	return 0;
+}
+
+
+/*
+ *	Add a destination into an existing service
+ */
+static int
+ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	union nf_inet_addr daddr;
+	__be16 dport = udest->port;
+	int ret;
+
+	EnterFunction(2);
+
+	if (udest->weight < 0) {
+		IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
+		return -ERANGE;
+	}
+
+	if (udest->l_threshold > udest->u_threshold) {
+		IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
+			  "upper threshold\n");
+		return -ERANGE;
+	}
+
+	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
+	/*
+	 * Check if the dest already exists in the list
+	 */
+	dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
+	if (dest != NULL) {
+		IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
+		return -EEXIST;
+	}
+
+	/*
+	 * Check if the dest already exists in the trash and
+	 * is from the same service
+	 */
+	dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+
+	if (dest != NULL) {
+		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
+			      "dest->refcnt=%d, service %u/%s:%u\n",
+			      IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+			      atomic_read(&dest->refcnt),
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
+			      ntohs(dest->vport));
+
+		__ip_vs_update_dest(svc, dest, udest);
+
+		/*
+		 * Get the destination from the trash
+		 */
+		list_del(&dest->n_list);
+
+		ip_vs_new_estimator(&dest->stats);
+
+		write_lock_bh(&__ip_vs_svc_lock);
+
+		/*
+		 * Wait until all other svc users go away.
+		 */
+		IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+		list_add(&dest->n_list, &svc->destinations);
+		svc->num_dests++;
+
+		/* call the update_service function of its scheduler */
+		if (svc->scheduler->update_service)
+			svc->scheduler->update_service(svc);
+
+		write_unlock_bh(&__ip_vs_svc_lock);
+		return 0;
+	}
+
+	/*
+	 * Allocate and initialize the dest structure
+	 */
+	ret = ip_vs_new_dest(svc, udest, &dest);
+	if (ret) {
+		return ret;
+	}
+
+	/*
+	 * Add the dest entry into the list
+	 */
+	atomic_inc(&dest->refcnt);
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 * Wait until all other svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	list_add(&dest->n_list, &svc->destinations);
+	svc->num_dests++;
+
+	/* call the update_service function of its scheduler */
+	if (svc->scheduler->update_service)
+		svc->scheduler->update_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *	Edit a destination in the given service
+ */
+static int
+ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	union nf_inet_addr daddr;
+	__be16 dport = udest->port;
+
+	EnterFunction(2);
+
+	if (udest->weight < 0) {
+		IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
+		return -ERANGE;
+	}
+
+	if (udest->l_threshold > udest->u_threshold) {
+		IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
+			  "upper threshold\n");
+		return -ERANGE;
+	}
+
+	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
+	/*
+	 *  Lookup the destination list
+	 */
+	dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
+	if (dest == NULL) {
+		IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
+		return -ENOENT;
+	}
+
+	__ip_vs_update_dest(svc, dest, udest);
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/* Wait until all other svc users go away */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	/* call the update_service, because server weight may be changed */
+	if (svc->scheduler->update_service)
+		svc->scheduler->update_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *	Delete a destination (must be already unlinked from the service)
+ */
+static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+{
+	ip_vs_kill_estimator(&dest->stats);
+
+	/*
+	 *  Remove it from the d-linked list with the real services.
+	 */
+	write_lock_bh(&__ip_vs_rs_lock);
+	ip_vs_rs_unhash(dest);
+	write_unlock_bh(&__ip_vs_rs_lock);
+
+	/*
+	 *  Decrease the refcnt of the dest, and free the dest
+	 *  if nobody refers to it (refcnt=0). Otherwise, throw
+	 *  the destination into the trash.
+	 */
+	if (atomic_dec_and_test(&dest->refcnt)) {
+		ip_vs_dst_reset(dest);
+		/* simply decrease svc->refcnt here, let the caller check
+		   and release the service if nobody refers to it.
+		   Only user context can release destination and service,
+		   and only one user context can update virtual service at a
+		   time, so the operation here is OK */
+		atomic_dec(&dest->svc->refcnt);
+		kfree(dest);
+	} else {
+		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
+			      "dest->refcnt=%d\n",
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+			      ntohs(dest->port),
+			      atomic_read(&dest->refcnt));
+		list_add(&dest->n_list, &ip_vs_dest_trash);
+		atomic_inc(&dest->refcnt);
+	}
+}
+
+
+/*
+ *	Unlink a destination from the given service
+ */
+static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
+				struct ip_vs_dest *dest,
+				int svcupd)
+{
+	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
+
+	/*
+	 *  Remove it from the d-linked destination list.
+	 */
+	list_del(&dest->n_list);
+	svc->num_dests--;
+
+	/*
+	 *  Call the update_service function of its scheduler
+	 */
+	if (svcupd && svc->scheduler->update_service)
+			svc->scheduler->update_service(svc);
+}
+
+
+/*
+ *	Delete a destination server in the given service
+ */
+static int
+ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	__be16 dport = udest->port;
+
+	EnterFunction(2);
+
+	dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+
+	if (dest == NULL) {
+		IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
+		return -ENOENT;
+	}
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 *	Wait until all other svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	/*
+	 *	Unlink dest from the service
+	 */
+	__ip_vs_unlink_dest(svc, dest, 1);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 *	Delete the destination
+	 */
+	__ip_vs_del_dest(dest);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *	Add a service into the service hash table
+ */
+static int
+ip_vs_add_service(struct ip_vs_service_user_kern *u,
+		  struct ip_vs_service **svc_p)
+{
+	int ret = 0;
+	struct ip_vs_scheduler *sched = NULL;
+	struct ip_vs_service *svc = NULL;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	/* Lookup the scheduler by 'u->sched_name' */
+	sched = ip_vs_scheduler_get(u->sched_name);
+	if (sched == NULL) {
+		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
+			   u->sched_name);
+		ret = -ENOENT;
+		goto out_mod_dec;
+	}
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (u->af == AF_INET6) {
+		if (!sched->supports_ipv6) {
+			ret = -EAFNOSUPPORT;
+			goto out_err;
+		}
+		if ((u->netmask < 1) || (u->netmask > 128)) {
+			ret = -EINVAL;
+			goto out_err;
+		}
+	}
+#endif
+
+	svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
+	if (svc == NULL) {
+		IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	/* I'm the first user of the service */
+	atomic_set(&svc->usecnt, 1);
+	atomic_set(&svc->refcnt, 0);
+
+	svc->af = u->af;
+	svc->protocol = u->protocol;
+	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
+	svc->port = u->port;
+	svc->fwmark = u->fwmark;
+	svc->flags = u->flags;
+	svc->timeout = u->timeout * HZ;
+	svc->netmask = u->netmask;
+
+	INIT_LIST_HEAD(&svc->destinations);
+	rwlock_init(&svc->sched_lock);
+	spin_lock_init(&svc->stats.lock);
+
+	/* Bind the scheduler */
+	ret = ip_vs_bind_scheduler(svc, sched);
+	if (ret)
+		goto out_err;
+	sched = NULL;
+
+	/* Update the virtual service counters */
+	if (svc->port == FTPPORT)
+		atomic_inc(&ip_vs_ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_inc(&ip_vs_nullsvc_counter);
+
+	ip_vs_new_estimator(&svc->stats);
+
+	/* Count only IPv4 services for old get/setsockopt interface */
+	if (svc->af == AF_INET)
+		ip_vs_num_services++;
+
+	/* Hash the service into the service table */
+	write_lock_bh(&__ip_vs_svc_lock);
+	ip_vs_svc_hash(svc);
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	*svc_p = svc;
+	return 0;
+
+  out_err:
+	if (svc != NULL) {
+		if (svc->scheduler)
+			ip_vs_unbind_scheduler(svc);
+		if (svc->inc) {
+			local_bh_disable();
+			ip_vs_app_inc_put(svc->inc);
+			local_bh_enable();
+		}
+		kfree(svc);
+	}
+	ip_vs_scheduler_put(sched);
+
+  out_mod_dec:
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	return ret;
+}
+
+
+/*
+ *	Edit a service and bind it with a new scheduler
+ */
+static int
+ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
+{
+	struct ip_vs_scheduler *sched, *old_sched;
+	int ret = 0;
+
+	/*
+	 * Lookup the scheduler, by 'u->sched_name'
+	 */
+	sched = ip_vs_scheduler_get(u->sched_name);
+	if (sched == NULL) {
+		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
+			   u->sched_name);
+		return -ENOENT;
+	}
+	old_sched = sched;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (u->af == AF_INET6) {
+		if (!sched->supports_ipv6) {
+			ret = -EAFNOSUPPORT;
+			goto out;
+		}
+		if ((u->netmask < 1) || (u->netmask > 128)) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+#endif
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 * Wait until all other svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	/*
+	 * Set the flags and timeout value
+	 */
+	svc->flags = u->flags | IP_VS_SVC_F_HASHED;
+	svc->timeout = u->timeout * HZ;
+	svc->netmask = u->netmask;
+
+	old_sched = svc->scheduler;
+	if (sched != old_sched) {
+		/*
+		 * Unbind the old scheduler
+		 */
+		if ((ret = ip_vs_unbind_scheduler(svc))) {
+			old_sched = sched;
+			goto out_unlock;
+		}
+
+		/*
+		 * Bind the new scheduler
+		 */
+		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
+			/*
+			 * If ip_vs_bind_scheduler fails, restore the old
+			 * scheduler.
+			 * The main reason of failure is out of memory.
+			 *
+			 * The question is if the old scheduler can be
+			 * restored all the time. TODO: if it cannot be
+			 * restored some time, we must delete the service,
+			 * otherwise the system may crash.
+			 */
+			ip_vs_bind_scheduler(svc, old_sched);
+			old_sched = sched;
+			goto out_unlock;
+		}
+	}
+
+  out_unlock:
+	write_unlock_bh(&__ip_vs_svc_lock);
+#ifdef CONFIG_IP_VS_IPV6
+  out:
+#endif
+
+	if (old_sched)
+		ip_vs_scheduler_put(old_sched);
+
+	return ret;
+}
+
+
+/*
+ *	Delete a service from the service list
+ *	- The service must be unlinked, unlocked and not referenced!
+ *	- We are called under _bh lock
+ */
+static void __ip_vs_del_service(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest, *nxt;
+	struct ip_vs_scheduler *old_sched;
+
+	/* Count only IPv4 services for old get/setsockopt interface */
+	if (svc->af == AF_INET)
+		ip_vs_num_services--;
+
+	ip_vs_kill_estimator(&svc->stats);
+
+	/* Unbind scheduler */
+	old_sched = svc->scheduler;
+	ip_vs_unbind_scheduler(svc);
+	if (old_sched)
+		ip_vs_scheduler_put(old_sched);
+
+	/* Unbind app inc */
+	if (svc->inc) {
+		ip_vs_app_inc_put(svc->inc);
+		svc->inc = NULL;
+	}
+
+	/*
+	 *    Unlink the whole destination list
+	 */
+	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
+		__ip_vs_unlink_dest(svc, dest, 0);
+		__ip_vs_del_dest(dest);
+	}
+
+	/*
+	 *    Update the virtual service counters
+	 */
+	if (svc->port == FTPPORT)
+		atomic_dec(&ip_vs_ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_dec(&ip_vs_nullsvc_counter);
+
+	/*
+	 *    Free the service if nobody refers to it
+	 */
+	if (atomic_read(&svc->refcnt) == 0)
+		kfree(svc);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+}
+
+/*
+ *	Delete a service from the service list
+ */
+static int ip_vs_del_service(struct ip_vs_service *svc)
+{
+	if (svc == NULL)
+		return -EEXIST;
+
+	/*
+	 * Unhash it from the service table
+	 */
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	ip_vs_svc_unhash(svc);
+
+	/*
+	 * Wait until all the svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	__ip_vs_del_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	return 0;
+}
+
+
+/*
+ *	Flush all the virtual services
+ */
+static int ip_vs_flush(void)
+{
+	int idx;
+	struct ip_vs_service *svc, *nxt;
+
+	/*
+	 * Flush the service table hashed by <protocol,addr,port>
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
+			write_lock_bh(&__ip_vs_svc_lock);
+			ip_vs_svc_unhash(svc);
+			/*
+			 * Wait until all the svc users go away.
+			 */
+			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+			__ip_vs_del_service(svc);
+			write_unlock_bh(&__ip_vs_svc_lock);
+		}
+	}
+
+	/*
+	 * Flush the service table hashed by fwmark
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry_safe(svc, nxt,
+					 &ip_vs_svc_fwm_table[idx], f_list) {
+			write_lock_bh(&__ip_vs_svc_lock);
+			ip_vs_svc_unhash(svc);
+			/*
+			 * Wait until all the svc users go away.
+			 */
+			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+			__ip_vs_del_service(svc);
+			write_unlock_bh(&__ip_vs_svc_lock);
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ *	Zero counters in a service or all services
+ */
+static int ip_vs_zero_service(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+
+	write_lock_bh(&__ip_vs_svc_lock);
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		ip_vs_zero_stats(&dest->stats);
+	}
+	ip_vs_zero_stats(&svc->stats);
+	write_unlock_bh(&__ip_vs_svc_lock);
+	return 0;
+}
+
+static int ip_vs_zero_all(void)
+{
+	int idx;
+	struct ip_vs_service *svc;
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			ip_vs_zero_service(svc);
+		}
+	}
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			ip_vs_zero_service(svc);
+		}
+	}
+
+	ip_vs_zero_stats(&ip_vs_stats);
+	return 0;
+}
+
+
+static int
+proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+		     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val = *valp;
+	int rc;
+
+	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	if (write && (*valp != val)) {
+		if ((*valp < 0) || (*valp > 3)) {
+			/* Restore the correct value */
+			*valp = val;
+		} else {
+			update_defense_level();
+		}
+	}
+	return rc;
+}
+
+
+static int
+proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+		       void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val[2];
+	int rc;
+
+	/* backup the value first */
+	memcpy(val, valp, sizeof(val));
+
+	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
+		/* Restore the correct value */
+		memcpy(valp, val, sizeof(val));
+	}
+	return rc;
+}
+
+
+/*
+ *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ */
+
+static struct ctl_table vs_vars[] = {
+	{
+		.procname	= "amemthresh",
+		.data		= &sysctl_ip_vs_amemthresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#ifdef CONFIG_IP_VS_DEBUG
+	{
+		.procname	= "debug_level",
+		.data		= &sysctl_ip_vs_debug_level,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "am_droprate",
+		.data		= &sysctl_ip_vs_am_droprate,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.procname	= "drop_entry",
+		.data		= &sysctl_ip_vs_drop_entry,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_defense_mode,
+	},
+	{
+		.procname	= "drop_packet",
+		.data		= &sysctl_ip_vs_drop_packet,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_defense_mode,
+	},
+	{
+		.procname	= "secure_tcp",
+		.data		= &sysctl_ip_vs_secure_tcp,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_defense_mode,
+	},
+#if 0
+	{
+		.procname	= "timeout_established",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_synsent",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_synrecv",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_finwait",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_timewait",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_close",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_closewait",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_lastack",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_listen",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_synack",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_udp",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_icmp",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+#endif
+	{
+		.procname	= "cache_bypass",
+		.data		= &sysctl_ip_vs_cache_bypass,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.procname	= "expire_nodest_conn",
+		.data		= &sysctl_ip_vs_expire_nodest_conn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.procname	= "expire_quiescent_template",
+		.data		= &sysctl_ip_vs_expire_quiescent_template,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.procname	= "sync_threshold",
+		.data		= &sysctl_ip_vs_sync_threshold,
+		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_sync_threshold,
+	},
+	{
+		.procname	= "nat_icmp_send",
+		.data		= &sysctl_ip_vs_nat_icmp_send,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+
+const struct ctl_path net_vs_ctl_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+	{ .procname = "vs", },
+	{ }
+};
+EXPORT_SYMBOL_GPL(net_vs_ctl_path);
+
+static struct ctl_table_header * sysctl_header;
+
+#ifdef CONFIG_PROC_FS
+
+struct ip_vs_iter {
+	struct list_head *table;
+	int bucket;
+};
+
+/*
+ *	Write the contents of the VS rule table to a PROCfs file.
+ *	(It is kept just for backward compatibility)
+ */
+static inline const char *ip_vs_fwd_name(unsigned flags)
+{
+	switch (flags & IP_VS_CONN_F_FWD_MASK) {
+	case IP_VS_CONN_F_LOCALNODE:
+		return "Local";
+	case IP_VS_CONN_F_TUNNEL:
+		return "Tunnel";
+	case IP_VS_CONN_F_DROUTE:
+		return "Route";
+	default:
+		return "Masq";
+	}
+}
+
+
+/* Get the Nth entry in the two lists */
+static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
+{
+	struct ip_vs_iter *iter = seq->private;
+	int idx;
+	struct ip_vs_service *svc;
+
+	/* look in hash by protocol */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			if (pos-- == 0){
+				iter->table = ip_vs_svc_table;
+				iter->bucket = idx;
+				return svc;
+			}
+		}
+	}
+
+	/* keep looking in fwmark */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			if (pos-- == 0) {
+				iter->table = ip_vs_svc_fwm_table;
+				iter->bucket = idx;
+				return svc;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
+__acquires(__ip_vs_svc_lock)
+{
+
+	read_lock_bh(&__ip_vs_svc_lock);
+	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+
+static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct list_head *e;
+	struct ip_vs_iter *iter;
+	struct ip_vs_service *svc;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_info_array(seq,0);
+
+	svc = v;
+	iter = seq->private;
+
+	if (iter->table == ip_vs_svc_table) {
+		/* next service in table hashed by protocol */
+		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
+			return list_entry(e, struct ip_vs_service, s_list);
+
+
+		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
+					    s_list) {
+				return svc;
+			}
+		}
+
+		iter->table = ip_vs_svc_fwm_table;
+		iter->bucket = -1;
+		goto scan_fwmark;
+	}
+
+	/* next service in hashed by fwmark */
+	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
+		return list_entry(e, struct ip_vs_service, f_list);
+
+ scan_fwmark:
+	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
+				    f_list)
+			return svc;
+	}
+
+	return NULL;
+}
+
+static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
+__releases(__ip_vs_svc_lock)
+{
+	read_unlock_bh(&__ip_vs_svc_lock);
+}
+
+
+static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq,
+			"IP Virtual Server version %d.%d.%d (size=%d)\n",
+			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+		seq_puts(seq,
+			 "Prot LocalAddress:Port Scheduler Flags\n");
+		seq_puts(seq,
+			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
+	} else {
+		const struct ip_vs_service *svc = v;
+		const struct ip_vs_iter *iter = seq->private;
+		const struct ip_vs_dest *dest;
+
+		if (iter->table == ip_vs_svc_table) {
+#ifdef CONFIG_IP_VS_IPV6
+			if (svc->af == AF_INET6)
+				seq_printf(seq, "%s  [" NIP6_FMT "]:%04X %s ",
+					   ip_vs_proto_name(svc->protocol),
+					   NIP6(svc->addr.in6),
+					   ntohs(svc->port),
+					   svc->scheduler->name);
+			else
+#endif
+				seq_printf(seq, "%s  %08X:%04X %s ",
+					   ip_vs_proto_name(svc->protocol),
+					   ntohl(svc->addr.ip),
+					   ntohs(svc->port),
+					   svc->scheduler->name);
+		} else {
+			seq_printf(seq, "FWM  %08X %s ",
+				   svc->fwmark, svc->scheduler->name);
+		}
+
+		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+			seq_printf(seq, "persistent %d %08X\n",
+				svc->timeout,
+				ntohl(svc->netmask));
+		else
+			seq_putc(seq, '\n');
+
+		list_for_each_entry(dest, &svc->destinations, n_list) {
+#ifdef CONFIG_IP_VS_IPV6
+			if (dest->af == AF_INET6)
+				seq_printf(seq,
+					   "  -> [" NIP6_FMT "]:%04X"
+					   "      %-7s %-6d %-10d %-10d\n",
+					   NIP6(dest->addr.in6),
+					   ntohs(dest->port),
+					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					   atomic_read(&dest->weight),
+					   atomic_read(&dest->activeconns),
+					   atomic_read(&dest->inactconns));
+			else
+#endif
+				seq_printf(seq,
+					   "  -> %08X:%04X      "
+					   "%-7s %-6d %-10d %-10d\n",
+					   ntohl(dest->addr.ip),
+					   ntohs(dest->port),
+					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					   atomic_read(&dest->weight),
+					   atomic_read(&dest->activeconns),
+					   atomic_read(&dest->inactconns));
+
+		}
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_info_seq_ops = {
+	.start = ip_vs_info_seq_start,
+	.next  = ip_vs_info_seq_next,
+	.stop  = ip_vs_info_seq_stop,
+	.show  = ip_vs_info_seq_show,
+};
+
+static int ip_vs_info_open(struct inode *inode, struct file *file)
+{
+	return seq_open_private(file, &ip_vs_info_seq_ops,
+			sizeof(struct ip_vs_iter));
+}
+
+static const struct file_operations ip_vs_info_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip_vs_info_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_private,
+};
+
+#endif
+
+struct ip_vs_stats ip_vs_stats = {
+	.lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
+};
+
+#ifdef CONFIG_PROC_FS
+static int ip_vs_stats_show(struct seq_file *seq, void *v)
+{
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		 "   Total Incoming Outgoing         Incoming         Outgoing\n");
+	seq_printf(seq,
+		   "   Conns  Packets  Packets            Bytes            Bytes\n");
+
+	spin_lock_bh(&ip_vs_stats.lock);
+	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
+		   ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
+		   (unsigned long long) ip_vs_stats.ustats.inbytes,
+		   (unsigned long long) ip_vs_stats.ustats.outbytes);
+
+/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
+			ip_vs_stats.ustats.cps,
+			ip_vs_stats.ustats.inpps,
+			ip_vs_stats.ustats.outpps,
+			ip_vs_stats.ustats.inbps,
+			ip_vs_stats.ustats.outbps);
+	spin_unlock_bh(&ip_vs_stats.lock);
+
+	return 0;
+}
+
+static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ip_vs_stats_show, NULL);
+}
+
+static const struct file_operations ip_vs_stats_fops = {
+	.owner = THIS_MODULE,
+	.open = ip_vs_stats_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif
+
+/*
+ *	Set timeout values for tcp tcpfin udp in the timeout_table.
+ */
+static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+{
+	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
+		  u->tcp_timeout,
+		  u->tcp_fin_timeout,
+		  u->udp_timeout);
+
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	if (u->tcp_timeout) {
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+			= u->tcp_timeout * HZ;
+	}
+
+	if (u->tcp_fin_timeout) {
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+			= u->tcp_fin_timeout * HZ;
+	}
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	if (u->udp_timeout) {
+		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+			= u->udp_timeout * HZ;
+	}
+#endif
+	return 0;
+}
+
+
+#define SET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
+#define SERVICE_ARG_LEN		(sizeof(struct ip_vs_service_user))
+#define SVCDEST_ARG_LEN		(sizeof(struct ip_vs_service_user) +	\
+				 sizeof(struct ip_vs_dest_user))
+#define TIMEOUT_ARG_LEN		(sizeof(struct ip_vs_timeout_user))
+#define DAEMON_ARG_LEN		(sizeof(struct ip_vs_daemon_user))
+#define MAX_ARG_LEN		SVCDEST_ARG_LEN
+
+static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
+	[SET_CMDID(IP_VS_SO_SET_ADD)]		= SERVICE_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_EDIT)]		= SERVICE_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_DEL)]		= SERVICE_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_FLUSH)]		= 0,
+	[SET_CMDID(IP_VS_SO_SET_ADDDEST)]	= SVCDEST_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_DELDEST)]	= SVCDEST_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_EDITDEST)]	= SVCDEST_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_TIMEOUT)]	= TIMEOUT_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]	= DAEMON_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]	= DAEMON_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_ZERO)]		= SERVICE_ARG_LEN,
+};
+
+static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
+				  struct ip_vs_service_user *usvc_compat)
+{
+	usvc->af		= AF_INET;
+	usvc->protocol		= usvc_compat->protocol;
+	usvc->addr.ip		= usvc_compat->addr;
+	usvc->port		= usvc_compat->port;
+	usvc->fwmark		= usvc_compat->fwmark;
+
+	/* Deep copy of sched_name is not needed here */
+	usvc->sched_name	= usvc_compat->sched_name;
+
+	usvc->flags		= usvc_compat->flags;
+	usvc->timeout		= usvc_compat->timeout;
+	usvc->netmask		= usvc_compat->netmask;
+}
+
+static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
+				   struct ip_vs_dest_user *udest_compat)
+{
+	udest->addr.ip		= udest_compat->addr;
+	udest->port		= udest_compat->port;
+	udest->conn_flags	= udest_compat->conn_flags;
+	udest->weight		= udest_compat->weight;
+	udest->u_threshold	= udest_compat->u_threshold;
+	udest->l_threshold	= udest_compat->l_threshold;
+}
+
+static int
+do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+	int ret;
+	unsigned char arg[MAX_ARG_LEN];
+	struct ip_vs_service_user *usvc_compat;
+	struct ip_vs_service_user_kern usvc;
+	struct ip_vs_service *svc;
+	struct ip_vs_dest_user *udest_compat;
+	struct ip_vs_dest_user_kern udest;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (len != set_arglen[SET_CMDID(cmd)]) {
+		IP_VS_ERR("set_ctl: len %u != %u\n",
+			  len, set_arglen[SET_CMDID(cmd)]);
+		return -EINVAL;
+	}
+
+	if (copy_from_user(arg, user, len) != 0)
+		return -EFAULT;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	if (mutex_lock_interruptible(&__ip_vs_mutex)) {
+		ret = -ERESTARTSYS;
+		goto out_dec;
+	}
+
+	if (cmd == IP_VS_SO_SET_FLUSH) {
+		/* Flush the virtual service */
+		ret = ip_vs_flush();
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
+		/* Set timeout values for (tcp tcpfin udp) */
+		ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
+		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+		ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
+		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+		ret = stop_sync_thread(dm->state);
+		goto out_unlock;
+	}
+
+	usvc_compat = (struct ip_vs_service_user *)arg;
+	udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
+
+	/* We only use the new structs internally, so copy userspace compat
+	 * structs to extended internal versions */
+	ip_vs_copy_usvc_compat(&usvc, usvc_compat);
+	ip_vs_copy_udest_compat(&udest, udest_compat);
+
+	if (cmd == IP_VS_SO_SET_ZERO) {
+		/* if no service address is set, zero counters in all */
+		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
+			ret = ip_vs_zero_all();
+			goto out_unlock;
+		}
+	}
+
+	/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
+	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
+		IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
+			  usvc.protocol, NIPQUAD(usvc.addr.ip),
+			  ntohs(usvc.port), usvc.sched_name);
+		ret = -EFAULT;
+		goto out_unlock;
+	}
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+					  &usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+
+	if (cmd != IP_VS_SO_SET_ADD
+	    && (svc == NULL || svc->protocol != usvc.protocol)) {
+		ret = -ESRCH;
+		goto out_unlock;
+	}
+
+	switch (cmd) {
+	case IP_VS_SO_SET_ADD:
+		if (svc != NULL)
+			ret = -EEXIST;
+		else
+			ret = ip_vs_add_service(&usvc, &svc);
+		break;
+	case IP_VS_SO_SET_EDIT:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IP_VS_SO_SET_DEL:
+		ret = ip_vs_del_service(svc);
+		if (!ret)
+			goto out_unlock;
+		break;
+	case IP_VS_SO_SET_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	case IP_VS_SO_SET_ADDDEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IP_VS_SO_SET_EDITDEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IP_VS_SO_SET_DELDEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (svc)
+		ip_vs_service_put(svc);
+
+  out_unlock:
+	mutex_unlock(&__ip_vs_mutex);
+  out_dec:
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	return ret;
+}
+
+
+static void
+ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+{
+	spin_lock_bh(&src->lock);
+	memcpy(dst, &src->ustats, sizeof(*dst));
+	spin_unlock_bh(&src->lock);
+}
+
+static void
+ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
+{
+	dst->protocol = src->protocol;
+	dst->addr = src->addr.ip;
+	dst->port = src->port;
+	dst->fwmark = src->fwmark;
+	strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
+	dst->flags = src->flags;
+	dst->timeout = src->timeout / HZ;
+	dst->netmask = src->netmask;
+	dst->num_dests = src->num_dests;
+	ip_vs_copy_stats(&dst->stats, &src->stats);
+}
+
+static inline int
+__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+			    struct ip_vs_get_services __user *uptr)
+{
+	int idx, count=0;
+	struct ip_vs_service *svc;
+	struct ip_vs_service_entry entry;
+	int ret = 0;
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			/* Only expose IPv4 entries to old interface */
+			if (svc->af != AF_INET)
+				continue;
+
+			if (count >= get->num_services)
+				goto out;
+			memset(&entry, 0, sizeof(entry));
+			ip_vs_copy_service(&entry, svc);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			/* Only expose IPv4 entries to old interface */
+			if (svc->af != AF_INET)
+				continue;
+
+			if (count >= get->num_services)
+				goto out;
+			memset(&entry, 0, sizeof(entry));
+			ip_vs_copy_service(&entry, svc);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+  out:
+	return ret;
+}
+
+static inline int
+__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+			 struct ip_vs_get_dests __user *uptr)
+{
+	struct ip_vs_service *svc;
+	union nf_inet_addr addr = { .ip = get->addr };
+	int ret = 0;
+
+	if (get->fwmark)
+		svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
+	else
+		svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
+					  get->port);
+
+	if (svc) {
+		int count = 0;
+		struct ip_vs_dest *dest;
+		struct ip_vs_dest_entry entry;
+
+		list_for_each_entry(dest, &svc->destinations, n_list) {
+			if (count >= get->num_dests)
+				break;
+
+			entry.addr = dest->addr.ip;
+			entry.port = dest->port;
+			entry.conn_flags = atomic_read(&dest->conn_flags);
+			entry.weight = atomic_read(&dest->weight);
+			entry.u_threshold = dest->u_threshold;
+			entry.l_threshold = dest->l_threshold;
+			entry.activeconns = atomic_read(&dest->activeconns);
+			entry.inactconns = atomic_read(&dest->inactconns);
+			entry.persistconns = atomic_read(&dest->persistconns);
+			ip_vs_copy_stats(&entry.stats, &dest->stats);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				break;
+			}
+			count++;
+		}
+		ip_vs_service_put(svc);
+	} else
+		ret = -ESRCH;
+	return ret;
+}
+
+static inline void
+__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	u->tcp_timeout =
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+	u->tcp_fin_timeout =
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	u->udp_timeout =
+		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+#endif
+}
+
+
+#define GET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
+#define GET_INFO_ARG_LEN	(sizeof(struct ip_vs_getinfo))
+#define GET_SERVICES_ARG_LEN	(sizeof(struct ip_vs_get_services))
+#define GET_SERVICE_ARG_LEN	(sizeof(struct ip_vs_service_entry))
+#define GET_DESTS_ARG_LEN	(sizeof(struct ip_vs_get_dests))
+#define GET_TIMEOUT_ARG_LEN	(sizeof(struct ip_vs_timeout_user))
+#define GET_DAEMON_ARG_LEN	(sizeof(struct ip_vs_daemon_user) * 2)
+
+static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
+	[GET_CMDID(IP_VS_SO_GET_VERSION)]	= 64,
+	[GET_CMDID(IP_VS_SO_GET_INFO)]		= GET_INFO_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_SERVICES)]	= GET_SERVICES_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_SERVICE)]	= GET_SERVICE_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_DESTS)]		= GET_DESTS_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_TIMEOUT)]	= GET_TIMEOUT_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_DAEMON)]	= GET_DAEMON_ARG_LEN,
+};
+
+static int
+do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	unsigned char arg[128];
+	int ret = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (*len < get_arglen[GET_CMDID(cmd)]) {
+		IP_VS_ERR("get_ctl: len %u < %u\n",
+			  *len, get_arglen[GET_CMDID(cmd)]);
+		return -EINVAL;
+	}
+
+	if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
+		return -EFAULT;
+
+	if (mutex_lock_interruptible(&__ip_vs_mutex))
+		return -ERESTARTSYS;
+
+	switch (cmd) {
+	case IP_VS_SO_GET_VERSION:
+	{
+		char buf[64];
+
+		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
+			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+		*len = strlen(buf)+1;
+	}
+	break;
+
+	case IP_VS_SO_GET_INFO:
+	{
+		struct ip_vs_getinfo info;
+		info.version = IP_VS_VERSION_CODE;
+		info.size = IP_VS_CONN_TAB_SIZE;
+		info.num_services = ip_vs_num_services;
+		if (copy_to_user(user, &info, sizeof(info)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICES:
+	{
+		struct ip_vs_get_services *get;
+		int size;
+
+		get = (struct ip_vs_get_services *)arg;
+		size = sizeof(*get) +
+			sizeof(struct ip_vs_service_entry) * get->num_services;
+		if (*len != size) {
+			IP_VS_ERR("length: %u != %u\n", *len, size);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_service_entries(get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICE:
+	{
+		struct ip_vs_service_entry *entry;
+		struct ip_vs_service *svc;
+		union nf_inet_addr addr;
+
+		entry = (struct ip_vs_service_entry *)arg;
+		addr.ip = entry->addr;
+		if (entry->fwmark)
+			svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
+		else
+			svc = __ip_vs_service_get(AF_INET, entry->protocol,
+						  &addr, entry->port);
+		if (svc) {
+			ip_vs_copy_service(entry, svc);
+			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
+				ret = -EFAULT;
+			ip_vs_service_put(svc);
+		} else
+			ret = -ESRCH;
+	}
+	break;
+
+	case IP_VS_SO_GET_DESTS:
+	{
+		struct ip_vs_get_dests *get;
+		int size;
+
+		get = (struct ip_vs_get_dests *)arg;
+		size = sizeof(*get) +
+			sizeof(struct ip_vs_dest_entry) * get->num_dests;
+		if (*len != size) {
+			IP_VS_ERR("length: %u != %u\n", *len, size);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_dest_entries(get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_TIMEOUT:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+		if (copy_to_user(user, &t, sizeof(t)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	case IP_VS_SO_GET_DAEMON:
+	{
+		struct ip_vs_daemon_user d[2];
+
+		memset(&d, 0, sizeof(d));
+		if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+			d[0].state = IP_VS_STATE_MASTER;
+			strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
+			d[0].syncid = ip_vs_master_syncid;
+		}
+		if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+			d[1].state = IP_VS_STATE_BACKUP;
+			strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
+			d[1].syncid = ip_vs_backup_syncid;
+		}
+		if (copy_to_user(user, &d, sizeof(d)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+  out:
+	mutex_unlock(&__ip_vs_mutex);
+	return ret;
+}
+
+
+static struct nf_sockopt_ops ip_vs_sockopts = {
+	.pf		= PF_INET,
+	.set_optmin	= IP_VS_BASE_CTL,
+	.set_optmax	= IP_VS_SO_SET_MAX+1,
+	.set		= do_ip_vs_set_ctl,
+	.get_optmin	= IP_VS_BASE_CTL,
+	.get_optmax	= IP_VS_SO_GET_MAX+1,
+	.get		= do_ip_vs_get_ctl,
+	.owner		= THIS_MODULE,
+};
+
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= IPVS_GENL_NAME,
+	.version	= IPVS_GENL_VERSION,
+	.maxattr	= IPVS_CMD_MAX,
+};
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_IFNAME_MAXLEN },
+	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_SCHEDNAME_MAXLEN },
+	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
+					    .len = sizeof(struct ip_vs_flags) },
+	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+				 struct ip_vs_stats *stats)
+{
+	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	spin_lock_bh(&stats->lock);
+
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
+
+	spin_unlock_bh(&stats->lock);
+
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&stats->lock);
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc)
+{
+	struct nlattr *nl_service;
+	struct ip_vs_flags flags = { .flags = svc->flags,
+				     .mask = ~0 };
+
+	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+	if (!nl_service)
+		return -EMSGSIZE;
+
+	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
+
+	if (svc->fwmark) {
+		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+	} else {
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
+		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+	}
+
+	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_service);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_service);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc,
+				   struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_SERVICE);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_service(skb, svc) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	int idx = 0, i;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+
+	mutex_lock(&__ip_vs_mutex);
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+				    struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+
+	/* Parse mandatory identifying service fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+		return -EINVAL;
+
+	nla_af		= attrs[IPVS_SVC_ATTR_AF];
+	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
+	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
+	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
+	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
+
+	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+		return -EINVAL;
+
+	usvc->af = nla_get_u16(nla_af);
+#ifdef CONFIG_IP_VS_IPV6
+	if (usvc->af != AF_INET && usvc->af != AF_INET6)
+#else
+	if (usvc->af != AF_INET)
+#endif
+		return -EAFNOSUPPORT;
+
+	if (nla_fwmark) {
+		usvc->protocol = IPPROTO_TCP;
+		usvc->fwmark = nla_get_u32(nla_fwmark);
+	} else {
+		usvc->protocol = nla_get_u16(nla_protocol);
+		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+		usvc->port = nla_get_u16(nla_port);
+		usvc->fwmark = 0;
+	}
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+			      *nla_netmask;
+		struct ip_vs_flags flags;
+		struct ip_vs_service *svc;
+
+		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+			return -EINVAL;
+
+		nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+		/* prefill flags from service if it already exists */
+		if (usvc->fwmark)
+			svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
+		else
+			svc = __ip_vs_service_get(usvc->af, usvc->protocol,
+						  &usvc->addr, usvc->port);
+		if (svc) {
+			usvc->flags = svc->flags;
+			ip_vs_service_put(svc);
+		} else
+			usvc->flags = 0;
+
+		/* set new flags from userland */
+		usvc->flags = (usvc->flags & ~flags.mask) |
+			      (flags.flags & flags.mask);
+		usvc->sched_name = nla_data(nla_sched);
+		usvc->timeout = nla_get_u32(nla_timeout);
+		usvc->netmask = nla_get_u32(nla_netmask);
+	}
+
+	return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+{
+	struct ip_vs_service_user_kern usvc;
+	int ret;
+
+	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (usvc.fwmark)
+		return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+	else
+		return __ip_vs_service_get(usvc.af, usvc.protocol,
+					   &usvc.addr, usvc.port);
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+	struct nlattr *nl_dest;
+
+	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+	if (!nl_dest)
+		return -EMSGSIZE;
+
+	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
+	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
+
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+		    atomic_read(&dest->activeconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+		    atomic_read(&dest->inactconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+		    atomic_read(&dest->persistconns));
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_dest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_dest);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DEST);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_dest(skb, dest) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	int idx = 0;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+
+	mutex_lock(&__ip_vs_mutex);
+
+	/* Try to find the service for which to dump destinations */
+	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
+			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+		goto out_err;
+
+	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+	if (IS_ERR(svc) || svc == NULL)
+		goto out_err;
+
+	/* Dump the destinations */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (++idx <= start)
+			continue;
+		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+			idx--;
+			goto nla_put_failure;
+		}
+	}
+
+nla_put_failure:
+	cb->args[0] = idx;
+	ip_vs_service_put(svc);
+
+out_err:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
+				 struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+	struct nlattr *nla_addr, *nla_port;
+
+	/* Parse mandatory identifying destination fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+		return -EINVAL;
+
+	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
+	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
+
+	if (!(nla_addr && nla_port))
+		return -EINVAL;
+
+	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+	udest->port = nla_get_u16(nla_port);
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+			      *nla_l_thresh;
+
+		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
+		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
+		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
+		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
+
+		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+			return -EINVAL;
+
+		udest->conn_flags = nla_get_u32(nla_fwd)
+				    & IP_VS_CONN_F_FWD_MASK;
+		udest->weight = nla_get_u32(nla_weight);
+		udest->u_threshold = nla_get_u32(nla_u_thresh);
+		udest->l_threshold = nla_get_u32(nla_l_thresh);
+	}
+
+	return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid)
+{
+	struct nlattr *nl_daemon;
+
+	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+	if (!nl_daemon)
+		return -EMSGSIZE;
+
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
+	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
+
+	nla_nest_end(skb, nl_daemon);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_daemon);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid,
+				  struct netlink_callback *cb)
+{
+	void *hdr;
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DAEMON);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	mutex_lock(&__ip_vs_mutex);
+	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+					   ip_vs_master_mcast_ifn,
+					   ip_vs_master_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[0] = 1;
+	}
+
+	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+					   ip_vs_backup_mcast_ifn,
+					   ip_vs_backup_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[1] = 1;
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+{
+	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+		return -EINVAL;
+
+	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+}
+
+static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+{
+	if (!attrs[IPVS_DAEMON_ATTR_STATE])
+		return -EINVAL;
+
+	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+}
+
+static int ip_vs_genl_set_config(struct nlattr **attrs)
+{
+	struct ip_vs_timeout_user t;
+
+	__ip_vs_get_timeouts(&t);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+		t.tcp_fin_timeout =
+			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+	return ip_vs_set_timeout(&t);
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ip_vs_service *svc = NULL;
+	struct ip_vs_service_user_kern usvc;
+	struct ip_vs_dest_user_kern udest;
+	int ret = 0, cmd;
+	int need_full_svc = 0, need_full_dest = 0;
+
+	cmd = info->genlhdr->cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	if (cmd == IPVS_CMD_FLUSH) {
+		ret = ip_vs_flush();
+		goto out;
+	} else if (cmd == IPVS_CMD_SET_CONFIG) {
+		ret = ip_vs_genl_set_config(info->attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
+		   cmd == IPVS_CMD_DEL_DAEMON) {
+
+		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
+				     info->attrs[IPVS_CMD_ATTR_DAEMON],
+				     ip_vs_daemon_policy)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (cmd == IPVS_CMD_NEW_DAEMON)
+			ret = ip_vs_genl_new_daemon(daemon_attrs);
+		else
+			ret = ip_vs_genl_del_daemon(daemon_attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_ZERO &&
+		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+		ret = ip_vs_zero_all();
+		goto out;
+	}
+
+	/* All following commands require a service argument, so check if we
+	 * received a valid one. We need a full service specification when
+	 * adding / editing a service. Only identifying members otherwise. */
+	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+		need_full_svc = 1;
+
+	ret = ip_vs_genl_parse_service(&usvc,
+				       info->attrs[IPVS_CMD_ATTR_SERVICE],
+				       need_full_svc);
+	if (ret)
+		goto out;
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+					  &usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+
+	/* Unless we're adding a new service, the service must already exist */
+	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+		ret = -ESRCH;
+		goto out;
+	}
+
+	/* Destination commands require a valid destination argument. For
+	 * adding / editing a destination, we need a full destination
+	 * specification. */
+	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+	    cmd == IPVS_CMD_DEL_DEST) {
+		if (cmd != IPVS_CMD_DEL_DEST)
+			need_full_dest = 1;
+
+		ret = ip_vs_genl_parse_dest(&udest,
+					    info->attrs[IPVS_CMD_ATTR_DEST],
+					    need_full_dest);
+		if (ret)
+			goto out;
+	}
+
+	switch (cmd) {
+	case IPVS_CMD_NEW_SERVICE:
+		if (svc == NULL)
+			ret = ip_vs_add_service(&usvc, &svc);
+		else
+			ret = -EEXIST;
+		break;
+	case IPVS_CMD_SET_SERVICE:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IPVS_CMD_DEL_SERVICE:
+		ret = ip_vs_del_service(svc);
+		break;
+	case IPVS_CMD_NEW_DEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IPVS_CMD_SET_DEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IPVS_CMD_DEL_DEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	case IPVS_CMD_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	if (svc)
+		ip_vs_service_put(svc);
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *reply;
+	int ret, cmd, reply_cmd;
+
+	cmd = info->genlhdr->cmd;
+
+	if (cmd == IPVS_CMD_GET_SERVICE)
+		reply_cmd = IPVS_CMD_NEW_SERVICE;
+	else if (cmd == IPVS_CMD_GET_INFO)
+		reply_cmd = IPVS_CMD_SET_INFO;
+	else if (cmd == IPVS_CMD_GET_CONFIG)
+		reply_cmd = IPVS_CMD_SET_CONFIG;
+	else {
+		IP_VS_ERR("unknown Generic Netlink command\n");
+		return -EINVAL;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+	if (reply == NULL)
+		goto nla_put_failure;
+
+	switch (cmd) {
+	case IPVS_CMD_GET_SERVICE:
+	{
+		struct ip_vs_service *svc;
+
+		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		if (IS_ERR(svc)) {
+			ret = PTR_ERR(svc);
+			goto out_err;
+		} else if (svc) {
+			ret = ip_vs_genl_fill_service(msg, svc);
+			ip_vs_service_put(svc);
+			if (ret)
+				goto nla_put_failure;
+		} else {
+			ret = -ESRCH;
+			goto out_err;
+		}
+
+		break;
+	}
+
+	case IPVS_CMD_GET_CONFIG:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+			    t.tcp_fin_timeout);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+#endif
+
+		break;
+	}
+
+	case IPVS_CMD_GET_INFO:
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+			    IP_VS_CONN_TAB_SIZE);
+		break;
+	}
+
+	genlmsg_end(msg, reply);
+	ret = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+nla_put_failure:
+	IP_VS_ERR("not enough space in Netlink message\n");
+	ret = -EMSGSIZE;
+
+out_err:
+	nlmsg_free(msg);
+out:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+
+static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+	{
+		.cmd	= IPVS_CMD_NEW_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+		.dumpit	= ip_vs_genl_dump_services,
+		.policy	= ip_vs_cmd_policy,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.dumpit	= ip_vs_genl_dump_dests,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.dumpit	= ip_vs_genl_dump_daemons,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_INFO,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_ZERO,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_FLUSH,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+};
+
+static int __init ip_vs_genl_register(void)
+{
+	int ret, i;
+
+	ret = genl_register_family(&ip_vs_genl_family);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
+		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
+		if (ret)
+			goto err_out;
+	}
+	return 0;
+
+err_out:
+	genl_unregister_family(&ip_vs_genl_family);
+	return ret;
+}
+
+static void ip_vs_genl_unregister(void)
+{
+	genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
+
+int __init ip_vs_control_init(void)
+{
+	int ret;
+	int idx;
+
+	EnterFunction(2);
+
+	ret = nf_register_sockopt(&ip_vs_sockopts);
+	if (ret) {
+		IP_VS_ERR("cannot register sockopt.\n");
+		return ret;
+	}
+
+	ret = ip_vs_genl_register();
+	if (ret) {
+		IP_VS_ERR("cannot register Generic Netlink interface.\n");
+		nf_unregister_sockopt(&ip_vs_sockopts);
+		return ret;
+	}
+
+	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
+	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
+
+	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
+
+	/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
+		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+	}
+	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+	}
+
+	ip_vs_new_estimator(&ip_vs_stats);
+
+	/* Hook the defense timer */
+	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+
+	LeaveFunction(2);
+	return 0;
+}
+
+
+void ip_vs_control_cleanup(void)
+{
+	EnterFunction(2);
+	ip_vs_trash_cleanup();
+	cancel_rearming_delayed_work(&defense_work);
+	cancel_work_sync(&defense_work.work);
+	ip_vs_kill_estimator(&ip_vs_stats);
+	unregister_sysctl_table(sysctl_header);
+	proc_net_remove(&init_net, "ip_vs_stats");
+	proc_net_remove(&init_net, "ip_vs");
+	ip_vs_genl_unregister();
+	nf_unregister_sockopt(&ip_vs_sockopts);
+	LeaveFunction(2);
+}
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
new file mode 100644
index 00000000000..a16943fd72f
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -0,0 +1,261 @@
+/*
+ * IPVS:        Destination Hashing scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              Inspired by the consistent hashing scheduler patch from
+ *              Thomas Proell <proellt@gmx.de>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The dh algorithm is to select server by the hash key of destination IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[dest_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded) OR (n.weight <= 0) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet destination IP address to the current server
+ * array. If the dh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      IPVS DH bucket
+ */
+struct ip_vs_dh_bucket {
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+};
+
+/*
+ *     for IPVS DH entry hash table
+ */
+#ifndef CONFIG_IP_VS_DH_TAB_BITS
+#define CONFIG_IP_VS_DH_TAB_BITS        8
+#endif
+#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
+#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
+#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
+
+
+/*
+ *	Returns hash value for IPVS DH entry
+ */
+static inline unsigned ip_vs_dh_hashkey(__be32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK;
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr)
+{
+	return (tbl[ip_vs_dh_hashkey(addr)]).dest;
+}
+
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+
+	b = tbl;
+	p = &svc->destinations;
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		if (list_empty(p)) {
+			b->dest = NULL;
+		} else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			atomic_inc(&dest->refcnt);
+			b->dest = dest;
+
+			p = p->next;
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+
+	b = tbl;
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		if (b->dest) {
+			atomic_dec(&b->dest->refcnt);
+			b->dest = NULL;
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl;
+
+	/* allocate the DH table for this service */
+	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
+		      GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_dh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_dh_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+	return 0;
+}
+
+
+static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_dh_flush(tbl);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_dh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+/*
+ *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
+ *      consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+	return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ *      Destination hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_dh_bucket *tbl;
+	struct iphdr *iph = ip_hdr(skb);
+
+	IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
+	dest = ip_vs_dh_get(tbl, iph->daddr);
+	if (!dest
+	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+	    || atomic_read(&dest->weight) <= 0
+	    || is_overloaded(dest)) {
+		return NULL;
+	}
+
+	IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->daddr),
+		  NIPQUAD(dest->addr.ip),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS DH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_dh_scheduler =
+{
+	.name =			"dh",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
+	.init_service =		ip_vs_dh_init_svc,
+	.done_service =		ip_vs_dh_done_svc,
+	.update_service =	ip_vs_dh_update_svc,
+	.schedule =		ip_vs_dh_schedule,
+};
+
+
+static int __init ip_vs_dh_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+static void __exit ip_vs_dh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+module_init(ip_vs_dh_init);
+module_exit(ip_vs_dh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
new file mode 100644
index 00000000000..2eb2860dabb
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -0,0 +1,166 @@
+/*
+ * ip_vs_est.c: simple rate estimator for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/list.h>
+
+#include <net/ip_vs.h>
+
+/*
+  This code is to estimate rate in a shorter interval (such as 8
+  seconds) for virtual services and real servers. For measure rate in a
+  long interval, it is easy to implement a user level daemon which
+  periodically reads those statistical counters and measure rate.
+
+  Currently, the measurement is activated by slow timer handler. Hope
+  this measurement will not introduce too much load.
+
+  We measure rate during the last 8 seconds every 2 seconds:
+
+    avgrate = avgrate*(1-W) + rate*W
+
+    where W = 2^(-2)
+
+  NOTES.
+
+  * The stored value for average bps is scaled by 2^5, so that maximal
+    rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+
+  * A lot code is taken from net/sched/estimator.c
+ */
+
+
+static void estimation_timer(unsigned long arg);
+
+static LIST_HEAD(est_list);
+static DEFINE_SPINLOCK(est_lock);
+static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
+
+static void estimation_timer(unsigned long arg)
+{
+	struct ip_vs_estimator *e;
+	struct ip_vs_stats *s;
+	u32 n_conns;
+	u32 n_inpkts, n_outpkts;
+	u64 n_inbytes, n_outbytes;
+	u32 rate;
+
+	spin_lock(&est_lock);
+	list_for_each_entry(e, &est_list, list) {
+		s = container_of(e, struct ip_vs_stats, est);
+
+		spin_lock(&s->lock);
+		n_conns = s->ustats.conns;
+		n_inpkts = s->ustats.inpkts;
+		n_outpkts = s->ustats.outpkts;
+		n_inbytes = s->ustats.inbytes;
+		n_outbytes = s->ustats.outbytes;
+
+		/* scaled by 2^10, but divided 2 seconds */
+		rate = (n_conns - e->last_conns)<<9;
+		e->last_conns = n_conns;
+		e->cps += ((long)rate - (long)e->cps)>>2;
+		s->ustats.cps = (e->cps+0x1FF)>>10;
+
+		rate = (n_inpkts - e->last_inpkts)<<9;
+		e->last_inpkts = n_inpkts;
+		e->inpps += ((long)rate - (long)e->inpps)>>2;
+		s->ustats.inpps = (e->inpps+0x1FF)>>10;
+
+		rate = (n_outpkts - e->last_outpkts)<<9;
+		e->last_outpkts = n_outpkts;
+		e->outpps += ((long)rate - (long)e->outpps)>>2;
+		s->ustats.outpps = (e->outpps+0x1FF)>>10;
+
+		rate = (n_inbytes - e->last_inbytes)<<4;
+		e->last_inbytes = n_inbytes;
+		e->inbps += ((long)rate - (long)e->inbps)>>2;
+		s->ustats.inbps = (e->inbps+0xF)>>5;
+
+		rate = (n_outbytes - e->last_outbytes)<<4;
+		e->last_outbytes = n_outbytes;
+		e->outbps += ((long)rate - (long)e->outbps)>>2;
+		s->ustats.outbps = (e->outbps+0xF)>>5;
+		spin_unlock(&s->lock);
+	}
+	spin_unlock(&est_lock);
+	mod_timer(&est_timer, jiffies + 2*HZ);
+}
+
+void ip_vs_new_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est = &stats->est;
+
+	INIT_LIST_HEAD(&est->list);
+
+	est->last_conns = stats->ustats.conns;
+	est->cps = stats->ustats.cps<<10;
+
+	est->last_inpkts = stats->ustats.inpkts;
+	est->inpps = stats->ustats.inpps<<10;
+
+	est->last_outpkts = stats->ustats.outpkts;
+	est->outpps = stats->ustats.outpps<<10;
+
+	est->last_inbytes = stats->ustats.inbytes;
+	est->inbps = stats->ustats.inbps<<5;
+
+	est->last_outbytes = stats->ustats.outbytes;
+	est->outbps = stats->ustats.outbps<<5;
+
+	spin_lock_bh(&est_lock);
+	list_add(&est->list, &est_list);
+	spin_unlock_bh(&est_lock);
+}
+
+void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est = &stats->est;
+
+	spin_lock_bh(&est_lock);
+	list_del(&est->list);
+	spin_unlock_bh(&est_lock);
+}
+
+void ip_vs_zero_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est = &stats->est;
+
+	/* set counters zero, caller must hold the stats->lock lock */
+	est->last_inbytes = 0;
+	est->last_outbytes = 0;
+	est->last_conns = 0;
+	est->last_inpkts = 0;
+	est->last_outpkts = 0;
+	est->cps = 0;
+	est->inpps = 0;
+	est->outpps = 0;
+	est->inbps = 0;
+	est->outbps = 0;
+}
+
+int __init ip_vs_estimator_init(void)
+{
+	mod_timer(&est_timer, jiffies + 2 * HZ);
+	return 0;
+}
+
+void ip_vs_estimator_cleanup(void)
+{
+	del_timer_sync(&est_timer);
+}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
new file mode 100644
index 00000000000..2e7dbd8b73a
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -0,0 +1,410 @@
+/*
+ * ip_vs_ftp.c: IPVS ftp application module
+ *
+ * Authors:	Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference
+ * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp.
+ *
+ *		IP_MASQ_FTP ftp masquerading module
+ *
+ * Version:	@(#)ip_masq_ftp.c 0.04   02/05/96
+ *
+ * Author:	Wouter Gadeyne
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <asm/unaligned.h>
+
+#include <net/ip_vs.h>
+
+
+#define SERVER_STRING "227 Entering Passive Mode ("
+#define CLIENT_STRING "PORT "
+
+
+/*
+ * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0};
+module_param_array(ports, ushort, NULL, 0);
+MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
+
+
+/*	Dummy variable */
+static int ip_vs_ftp_pasv;
+
+
+static int
+ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+	return 0;
+}
+
+
+/*
+ * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
+ * with the "pattern" and terminated with the "term" character.
+ * <addr,port> is in network order.
+ */
+static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
+				  const char *pattern, size_t plen, char term,
+				  __be32 *addr, __be16 *port,
+				  char **start, char **end)
+{
+	unsigned char p[6];
+	int i = 0;
+
+	if (data_limit - data < plen) {
+		/* check if there is partial match */
+		if (strnicmp(data, pattern, data_limit - data) == 0)
+			return -1;
+		else
+			return 0;
+	}
+
+	if (strnicmp(data, pattern, plen) != 0) {
+		return 0;
+	}
+	*start = data + plen;
+
+	for (data = *start; *data != term; data++) {
+		if (data == data_limit)
+			return -1;
+	}
+	*end = data;
+
+	memset(p, 0, sizeof(p));
+	for (data = *start; data != *end; data++) {
+		if (*data >= '0' && *data <= '9') {
+			p[i] = p[i]*10 + *data - '0';
+		} else if (*data == ',' && i < 5) {
+			i++;
+		} else {
+			/* unexpected character */
+			return -1;
+		}
+	}
+
+	if (i != 5)
+		return -1;
+
+	*addr = get_unaligned((__be32 *)p);
+	*port = get_unaligned((__be16 *)(p + 4));
+	return 1;
+}
+
+
+/*
+ * Look at outgoing ftp packets to catch the response to a PASV command
+ * from the server (inside-to-outside).
+ * When we see one, we build a connection entry with the client address,
+ * client port 0 (unknown at the moment), the server address and the
+ * server port.  Mark the current connection entry as a control channel
+ * of the new entry. All this work is just to make the data connection
+ * can be scheduled to the right server later.
+ *
+ * The outgoing packet should be something like
+ *   "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
+ * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
+ */
+static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
+			 struct sk_buff *skb, int *diff)
+{
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_limit;
+	char *start, *end;
+	union nf_inet_addr from;
+	__be16 port;
+	struct ip_vs_conn *n_cp;
+	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
+	unsigned buf_len;
+	int ret;
+
+#ifdef CONFIG_IP_VS_IPV6
+	/* This application helper doesn't work with IPv6 yet,
+	 * so turn this into a no-op for IPv6 packets
+	 */
+	if (cp->af == AF_INET6)
+		return 1;
+#endif
+
+	*diff = 0;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+		return 1;
+
+	/* Linear packets are much easier to deal with. */
+	if (!skb_make_writable(skb, skb->len))
+		return 0;
+
+	if (cp->app_data == &ip_vs_ftp_pasv) {
+		iph = ip_hdr(skb);
+		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+		data = (char *)th + (th->doff << 2);
+		data_limit = skb_tail_pointer(skb);
+
+		if (ip_vs_ftp_get_addrport(data, data_limit,
+					   SERVER_STRING,
+					   sizeof(SERVER_STRING)-1, ')',
+					   &from.ip, &port,
+					   &start, &end) != 1)
+			return 1;
+
+		IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
+			  "%u.%u.%u.%u:%d detected\n",
+			  NIPQUAD(from.ip), ntohs(port),
+			  NIPQUAD(cp->caddr.ip), 0);
+
+		/*
+		 * Now update or create an connection entry for it
+		 */
+		n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
+					  &cp->caddr, 0);
+		if (!n_cp) {
+			n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+					      &cp->caddr, 0,
+					      &cp->vaddr, port,
+					      &from, port,
+					      IP_VS_CONN_F_NO_CPORT,
+					      cp->dest);
+			if (!n_cp)
+				return 0;
+
+			/* add its controller */
+			ip_vs_control_add(n_cp, cp);
+		}
+
+		/*
+		 * Replace the old passive address with the new one
+		 */
+		from.ip = n_cp->vaddr.ip;
+		port = n_cp->vport;
+		sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
+			(ntohs(port)>>8)&255, ntohs(port)&255);
+		buf_len = strlen(buf);
+
+		/*
+		 * Calculate required delta-offset to keep TCP happy
+		 */
+		*diff = buf_len - (end-start);
+
+		if (*diff == 0) {
+			/* simply replace it with new passive address */
+			memcpy(start, buf, buf_len);
+			ret = 1;
+		} else {
+			ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
+					  end-start, buf, buf_len);
+		}
+
+		cp->app_data = NULL;
+		ip_vs_tcp_conn_listen(n_cp);
+		ip_vs_conn_put(n_cp);
+		return ret;
+	}
+	return 1;
+}
+
+
+/*
+ * Look at incoming ftp packets to catch the PASV/PORT command
+ * (outside-to-inside).
+ *
+ * The incoming packet having the PORT command should be something like
+ *      "PORT xxx,xxx,xxx,xxx,ppp,ppp\n".
+ * xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number.
+ * In this case, we create a connection entry using the client address and
+ * port, so that the active ftp data connection from the server can reach
+ * the client.
+ */
+static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
+			struct sk_buff *skb, int *diff)
+{
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_start, *data_limit;
+	char *start, *end;
+	union nf_inet_addr to;
+	__be16 port;
+	struct ip_vs_conn *n_cp;
+
+#ifdef CONFIG_IP_VS_IPV6
+	/* This application helper doesn't work with IPv6 yet,
+	 * so turn this into a no-op for IPv6 packets
+	 */
+	if (cp->af == AF_INET6)
+		return 1;
+#endif
+
+	/* no diff required for incoming packets */
+	*diff = 0;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+		return 1;
+
+	/* Linear packets are much easier to deal with. */
+	if (!skb_make_writable(skb, skb->len))
+		return 0;
+
+	/*
+	 * Detecting whether it is passive
+	 */
+	iph = ip_hdr(skb);
+	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/* Since there may be OPTIONS in the TCP packet and the HLEN is
+	   the length of the header in 32-bit multiples, it is accurate
+	   to calculate data address by th+HLEN*4 */
+	data = data_start = (char *)th + (th->doff << 2);
+	data_limit = skb_tail_pointer(skb);
+
+	while (data <= data_limit - 6) {
+		if (strnicmp(data, "PASV\r\n", 6) == 0) {
+			/* Passive mode on */
+			IP_VS_DBG(7, "got PASV at %td of %td\n",
+				  data - data_start,
+				  data_limit - data_start);
+			cp->app_data = &ip_vs_ftp_pasv;
+			return 1;
+		}
+		data++;
+	}
+
+	/*
+	 * To support virtual FTP server, the scenerio is as follows:
+	 *       FTP client ----> Load Balancer ----> FTP server
+	 * First detect the port number in the application data,
+	 * then create a new connection entry for the coming data
+	 * connection.
+	 */
+	if (ip_vs_ftp_get_addrport(data_start, data_limit,
+				   CLIENT_STRING, sizeof(CLIENT_STRING)-1,
+				   '\r', &to.ip, &port,
+				   &start, &end) != 1)
+		return 1;
+
+	IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
+		  NIPQUAD(to.ip), ntohs(port));
+
+	/* Passive mode off */
+	cp->app_data = NULL;
+
+	/*
+	 * Now update or create a connection entry for it
+	 */
+	IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
+		  ip_vs_proto_name(iph->protocol),
+		  NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
+
+	n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
+				 &to, port,
+				 &cp->vaddr, htons(ntohs(cp->vport)-1));
+	if (!n_cp) {
+		n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+				      &to, port,
+				      &cp->vaddr, htons(ntohs(cp->vport)-1),
+				      &cp->daddr, htons(ntohs(cp->dport)-1),
+				      0,
+				      cp->dest);
+		if (!n_cp)
+			return 0;
+
+		/* add its controller */
+		ip_vs_control_add(n_cp, cp);
+	}
+
+	/*
+	 *	Move tunnel to listen state
+	 */
+	ip_vs_tcp_conn_listen(n_cp);
+	ip_vs_conn_put(n_cp);
+
+	return 1;
+}
+
+
+static struct ip_vs_app ip_vs_ftp = {
+	.name =		"ftp",
+	.type =		IP_VS_APP_TYPE_FTP,
+	.protocol =	IPPROTO_TCP,
+	.module =	THIS_MODULE,
+	.incs_list =	LIST_HEAD_INIT(ip_vs_ftp.incs_list),
+	.init_conn =	ip_vs_ftp_init_conn,
+	.done_conn =	ip_vs_ftp_done_conn,
+	.bind_conn =	NULL,
+	.unbind_conn =	NULL,
+	.pkt_out =	ip_vs_ftp_out,
+	.pkt_in =	ip_vs_ftp_in,
+};
+
+
+/*
+ *	ip_vs_ftp initialization
+ */
+static int __init ip_vs_ftp_init(void)
+{
+	int i, ret;
+	struct ip_vs_app *app = &ip_vs_ftp;
+
+	ret = register_ip_vs_app(app);
+	if (ret)
+		return ret;
+
+	for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
+		if (!ports[i])
+			continue;
+		ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+		if (ret)
+			break;
+		IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
+			   app->name, i, ports[i]);
+	}
+
+	if (ret)
+		unregister_ip_vs_app(app);
+
+	return ret;
+}
+
+
+/*
+ *	ip_vs_ftp finish.
+ */
+static void __exit ip_vs_ftp_exit(void)
+{
+	unregister_ip_vs_app(&ip_vs_ftp);
+}
+
+
+module_init(ip_vs_ftp_init);
+module_exit(ip_vs_ftp_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
new file mode 100644
index 00000000000..6ecef3518ca
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -0,0 +1,555 @@
+/*
+ * IPVS:        Locality-Based Least-Connection scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Martin Hamilton         :    fixed the terrible locking bugs
+ *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
+ *     Wensong Zhang           :    fixed the uninitilized tbl->lock bug
+ *     Wensong Zhang           :    added doing full expiration check to
+ *                                   collect stale entries of 24+ hours when
+ *                                   no partial expire check in a half hour
+ *     Julian Anastasov        :    replaced del_timer call with del_timer_sync
+ *                                   to avoid the possible race between timer
+ *                                   handler and del_timer thread in SMP
+ *
+ */
+
+/*
+ * The lblc algorithm is as follows (pseudo code):
+ *
+ *       if cachenode[dest_ip] is null then
+ *               n, cachenode[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- cachenode[dest_ip];
+ *               if (n is dead) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                 n, cachenode[dest_ip] <- {weighted least-conn node};
+ *
+ *       return n;
+ *
+ * Thanks must go to Wenzhuo Zhang for talking WCCP to me and pushing
+ * me to write this module.
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/jiffies.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblc entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
+
+
+/*
+ *     for IPVS lblc entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
+#define CONFIG_IP_VS_LBLC_TAB_BITS      10
+#endif
+#define IP_VS_LBLC_TAB_BITS     CONFIG_IP_VS_LBLC_TAB_BITS
+#define IP_VS_LBLC_TAB_SIZE     (1 << IP_VS_LBLC_TAB_BITS)
+#define IP_VS_LBLC_TAB_MASK     (IP_VS_LBLC_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS lblc entry represents an association between destination
+ *      IP address and its destination server
+ */
+struct ip_vs_lblc_entry {
+	struct list_head        list;
+	__be32                  addr;           /* destination IP address */
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+	unsigned long           lastuse;        /* last used time */
+};
+
+
+/*
+ *      IPVS lblc hash table
+ */
+struct ip_vs_lblc_table {
+	struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+};
+
+
+/*
+ *      IPVS LBLC sysctl table
+ */
+
+static ctl_table vs_vars_table[] = {
+	{
+		.procname	= "lblc_expiration",
+		.data		= &sysctl_ip_vs_lblc_expiration,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sysctl_header;
+
+static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+{
+	list_del(&en->list);
+	/*
+	 * We don't kfree dest because it is refered either by its service
+	 * or the trash dest list.
+	 */
+	atomic_dec(&en->dest->refcnt);
+	kfree(en);
+}
+
+
+/*
+ *	Returns hash value for IPVS LBLC entry
+ */
+static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblc_table.
+ *	returns bool success.
+ */
+static void
+ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
+{
+	unsigned hash = ip_vs_lblc_hashkey(en->addr);
+
+	list_add(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+}
+
+
+/*
+ *  Get ip_vs_lblc_entry associated with supplied parameters. Called under read
+ *  lock
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
+{
+	unsigned hash = ip_vs_lblc_hashkey(addr);
+	struct ip_vs_lblc_entry *en;
+
+	list_for_each_entry(en, &tbl->bucket[hash], list)
+		if (en->addr == addr)
+			return en;
+
+	return NULL;
+}
+
+
+/*
+ * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
+ * address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
+	       struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblc_entry *en;
+
+	en = ip_vs_lblc_get(tbl, daddr);
+	if (!en) {
+		en = kmalloc(sizeof(*en), GFP_ATOMIC);
+		if (!en) {
+			IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
+			return NULL;
+		}
+
+		en->addr = daddr;
+		en->lastuse = jiffies;
+
+		atomic_inc(&dest->refcnt);
+		en->dest = dest;
+
+		ip_vs_lblc_hash(tbl, en);
+	} else if (en->dest != dest) {
+		atomic_dec(&en->dest->refcnt);
+		atomic_inc(&dest->refcnt);
+		en->dest = dest;
+	}
+
+	return en;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
+{
+	struct ip_vs_lblc_entry *en, *nxt;
+	int i;
+
+	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+		}
+	}
+}
+
+
+static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct ip_vs_lblc_entry *en, *nxt;
+	unsigned long now = jiffies;
+	int i, j;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+		write_lock(&svc->sched_lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_before(now,
+					en->lastuse + sysctl_ip_vs_lblc_expiration))
+				continue;
+
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&svc->sched_lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblc table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblc_check_expire(unsigned long data)
+{
+	struct ip_vs_service *svc = (struct ip_vs_service *) data;
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct ip_vs_lblc_entry *en, *nxt;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblc_full_check(svc);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+		write_lock(&svc->sched_lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
+				continue;
+
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		write_unlock(&svc->sched_lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+
+static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblc_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblc_table for this service
+	 */
+	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
+		  "current service\n", sizeof(*tbl));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		INIT_LIST_HEAD(&tbl->bucket[i]);
+	}
+	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
+			(unsigned long)svc);
+	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
+
+	return 0;
+}
+
+
+static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblc_flush(tbl);
+
+	/* release the table itself */
+	kfree(tbl);
+	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
+		  sizeof(*tbl));
+
+	return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We think the overhead of processing active connections is fifty
+	 * times higher than that of inactive connections in average. (This
+	 * fifty times might not be accurate, we will change it later.) We
+	 * use the following formula to estimate the overhead:
+	 *                dest->activeconns*50 + dest->inactconns
+	 * and the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		if (atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		struct ip_vs_dest *d;
+
+		list_for_each_entry(d, &svc->destinations, n_list) {
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct iphdr *iph = ip_hdr(skb);
+	struct ip_vs_dest *dest = NULL;
+	struct ip_vs_lblc_entry *en;
+
+	IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
+
+	/* First look in our cache */
+	read_lock(&svc->sched_lock);
+	en = ip_vs_lblc_get(tbl, iph->daddr);
+	if (en) {
+		/* We only hold a read lock, but this is atomic */
+		en->lastuse = jiffies;
+
+		/*
+		 * If the destination is not available, i.e. it's in the trash,
+		 * we must ignore it, as it may be removed from under our feet,
+		 * if someone drops our reference count. Our caller only makes
+		 * sure that destinations, that are not in the trash, are not
+		 * moved to the trash, while we are scheduling. But anyone can
+		 * free up entries from the trash at any time.
+		 */
+
+		if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
+			dest = en->dest;
+	}
+	read_unlock(&svc->sched_lock);
+
+	/* If the destination has a weight and is not overloaded, use it */
+	if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+		goto out;
+
+	/* No cache entry or it is invalid, time to schedule */
+	dest = __ip_vs_lblc_schedule(svc, iph);
+	if (!dest) {
+		IP_VS_DBG(1, "no destination available\n");
+		return NULL;
+	}
+
+	/* If we fail to create a cache entry, we'll just use the valid dest */
+	write_lock(&svc->sched_lock);
+	ip_vs_lblc_new(tbl, iph->daddr, dest);
+	write_unlock(&svc->sched_lock);
+
+out:
+	IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->daddr),
+		  NIPQUAD(dest->addr.ip),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLC Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblc_scheduler =
+{
+	.name =			"lblc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
+	.init_service =		ip_vs_lblc_init_svc,
+	.done_service =		ip_vs_lblc_done_svc,
+	.schedule =		ip_vs_lblc_schedule,
+};
+
+
+static int __init ip_vs_lblc_init(void)
+{
+	int ret;
+
+	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+	ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+	if (ret)
+		unregister_sysctl_table(sysctl_header);
+	return ret;
+}
+
+
+static void __exit ip_vs_lblc_cleanup(void)
+{
+	unregister_sysctl_table(sysctl_header);
+	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+}
+
+
+module_init(ip_vs_lblc_init);
+module_exit(ip_vs_lblc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
new file mode 100644
index 00000000000..1f75ea83bcf
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -0,0 +1,755 @@
+/*
+ * IPVS:        Locality-Based Least-Connection with Replication scheduler
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Julian Anastasov        :    Added the missing (dest->weight>0)
+ *                                  condition in the ip_vs_dest_set_max.
+ *
+ */
+
+/*
+ * The lblc/r algorithm is as follows (pseudo code):
+ *
+ *       if serverSet[dest_ip] is null then
+ *               n, serverSet[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- {least-conn (alive) node in serverSet[dest_ip]};
+ *               if (n is null) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                   n <- {weighted least-conn node};
+ *                   add n to serverSet[dest_ip];
+ *               if |serverSet[dest_ip]| > 1 AND
+ *                   now - serverSet[dest_ip].lastMod > T then
+ *                   m <- {most conn node in serverSet[dest_ip]};
+ *                   remove m from serverSet[dest_ip];
+ *       if serverSet[dest_ip] changed then
+ *               serverSet[dest_ip].lastMod <- now;
+ *
+ *       return n;
+ *
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/jiffies.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <net/net_namespace.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblcr entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
+
+
+/*
+ *     for IPVS lblcr entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
+#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
+#endif
+#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
+#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
+#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS destination set structure and operations
+ */
+struct ip_vs_dest_list {
+	struct ip_vs_dest_list  *next;          /* list link */
+	struct ip_vs_dest       *dest;          /* destination server */
+};
+
+struct ip_vs_dest_set {
+	atomic_t                size;           /* set size */
+	unsigned long           lastmod;        /* last modified time */
+	struct ip_vs_dest_list  *list;          /* destination list */
+	rwlock_t	        lock;           /* lock for this list */
+};
+
+
+static struct ip_vs_dest_list *
+ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_list *e;
+
+	for (e=set->list; e!=NULL; e=e->next) {
+		if (e->dest == dest)
+			/* already existed */
+			return NULL;
+	}
+
+	e = kmalloc(sizeof(*e), GFP_ATOMIC);
+	if (e == NULL) {
+		IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
+		return NULL;
+	}
+
+	atomic_inc(&dest->refcnt);
+	e->dest = dest;
+
+	/* link it to the list */
+	e->next = set->list;
+	set->list = e;
+	atomic_inc(&set->size);
+
+	set->lastmod = jiffies;
+	return e;
+}
+
+static void
+ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_list *e, **ep;
+
+	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+		if (e->dest == dest) {
+			/* HIT */
+			*ep = e->next;
+			atomic_dec(&set->size);
+			set->lastmod = jiffies;
+			atomic_dec(&e->dest->refcnt);
+			kfree(e);
+			break;
+		}
+		ep = &e->next;
+	}
+}
+
+static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
+{
+	struct ip_vs_dest_list *e, **ep;
+
+	write_lock(&set->lock);
+	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+		*ep = e->next;
+		/*
+		 * We don't kfree dest because it is refered either
+		 * by its service or by the trash dest list.
+		 */
+		atomic_dec(&e->dest->refcnt);
+		kfree(e);
+	}
+	write_unlock(&set->lock);
+}
+
+/* get weighted least-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
+{
+	register struct ip_vs_dest_list *e;
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	if (set == NULL)
+		return NULL;
+
+	/* select the first destination server, whose weight > 0 */
+	for (e=set->list; e!=NULL; e=e->next) {
+		least = e->dest;
+		if (least->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		if ((atomic_read(&least->weight) > 0)
+		    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/* find the destination with the weighted least load */
+  nextstage:
+	for (e=e->next; e!=NULL; e=e->next) {
+		dest = e->dest;
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if ((loh * atomic_read(&dest->weight) >
+		     doh * atomic_read(&least->weight))
+		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+	return least;
+}
+
+
+/* get weighted most-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
+{
+	register struct ip_vs_dest_list *e;
+	struct ip_vs_dest *dest, *most;
+	int moh, doh;
+
+	if (set == NULL)
+		return NULL;
+
+	/* select the first destination server, whose weight > 0 */
+	for (e=set->list; e!=NULL; e=e->next) {
+		most = e->dest;
+		if (atomic_read(&most->weight) > 0) {
+			moh = atomic_read(&most->activeconns) * 50
+				+ atomic_read(&most->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/* find the destination with the weighted most load */
+  nextstage:
+	for (e=e->next; e!=NULL; e=e->next) {
+		dest = e->dest;
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
+		if ((moh * atomic_read(&dest->weight) <
+		     doh * atomic_read(&most->weight))
+		    && (atomic_read(&dest->weight) > 0)) {
+			most = dest;
+			moh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(most->addr.ip), ntohs(most->port),
+		  atomic_read(&most->activeconns),
+		  atomic_read(&most->refcnt),
+		  atomic_read(&most->weight), moh);
+	return most;
+}
+
+
+/*
+ *      IPVS lblcr entry represents an association between destination
+ *      IP address and its destination server set
+ */
+struct ip_vs_lblcr_entry {
+	struct list_head        list;
+	__be32                   addr;           /* destination IP address */
+	struct ip_vs_dest_set   set;            /* destination server set */
+	unsigned long           lastuse;        /* last used time */
+};
+
+
+/*
+ *      IPVS lblcr hash table
+ */
+struct ip_vs_lblcr_table {
+	struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+};
+
+
+/*
+ *      IPVS LBLCR sysctl table
+ */
+
+static ctl_table vs_vars_table[] = {
+	{
+		.procname	= "lblcr_expiration",
+		.data		= &sysctl_ip_vs_lblcr_expiration,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sysctl_header;
+
+static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
+{
+	list_del(&en->list);
+	ip_vs_dest_set_eraseall(&en->set);
+	kfree(en);
+}
+
+
+/*
+ *	Returns hash value for IPVS LBLCR entry
+ */
+static inline unsigned ip_vs_lblcr_hashkey(__be32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblcr_table.
+ *	returns bool success.
+ */
+static void
+ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
+{
+	unsigned hash = ip_vs_lblcr_hashkey(en->addr);
+
+	list_add(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+}
+
+
+/*
+ *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under
+ *  read lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
+{
+	unsigned hash = ip_vs_lblcr_hashkey(addr);
+	struct ip_vs_lblcr_entry *en;
+
+	list_for_each_entry(en, &tbl->bucket[hash], list)
+		if (en->addr == addr)
+			return en;
+
+	return NULL;
+}
+
+
+/*
+ * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
+ * IP address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl,  __be32 daddr,
+		struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblcr_entry *en;
+
+	en = ip_vs_lblcr_get(tbl, daddr);
+	if (!en) {
+		en = kmalloc(sizeof(*en), GFP_ATOMIC);
+		if (!en) {
+			IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
+			return NULL;
+		}
+
+		en->addr = daddr;
+		en->lastuse = jiffies;
+
+		/* initilize its dest set */
+		atomic_set(&(en->set.size), 0);
+		en->set.list = NULL;
+		rwlock_init(&en->set.lock);
+
+		ip_vs_lblcr_hash(tbl, en);
+	}
+
+	write_lock(&en->set.lock);
+	ip_vs_dest_set_insert(&en->set, dest);
+	write_unlock(&en->set.lock);
+
+	return en;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
+{
+	int i;
+	struct ip_vs_lblcr_entry *en, *nxt;
+
+	/* No locking required, only called during cleanup. */
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+			ip_vs_lblcr_free(en);
+		}
+	}
+}
+
+
+static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	unsigned long now = jiffies;
+	int i, j;
+	struct ip_vs_lblcr_entry *en, *nxt;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+		write_lock(&svc->sched_lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
+				       now))
+				continue;
+
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&svc->sched_lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblcr table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblcr_check_expire(unsigned long data)
+{
+	struct ip_vs_service *svc = (struct ip_vs_service *) data;
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct ip_vs_lblcr_entry *en, *nxt;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblcr_full_check(svc);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+		write_lock(&svc->sched_lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
+				continue;
+
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		write_unlock(&svc->sched_lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblcr_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblcr_table for this service
+	 */
+	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
+		  "current service\n", sizeof(*tbl));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		INIT_LIST_HEAD(&tbl->bucket[i]);
+	}
+	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
+			(unsigned long)svc);
+	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
+
+	return 0;
+}
+
+
+static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblcr_flush(tbl);
+
+	/* release the table itself */
+	kfree(tbl);
+	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
+		  sizeof(*tbl));
+
+	return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We think the overhead of processing active connections is fifty
+	 * times higher than that of inactive connections in average. (This
+	 * fifty times might not be accurate, we will change it later.) We
+	 * use the following formula to estimate the overhead:
+	 *                dest->activeconns*50 + dest->inactconns
+	 * and the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		if (atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		struct ip_vs_dest *d;
+
+		list_for_each_entry(d, &svc->destinations, n_list) {
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	struct iphdr *iph = ip_hdr(skb);
+	struct ip_vs_dest *dest = NULL;
+	struct ip_vs_lblcr_entry *en;
+
+	IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
+
+	/* First look in our cache */
+	read_lock(&svc->sched_lock);
+	en = ip_vs_lblcr_get(tbl, iph->daddr);
+	if (en) {
+		/* We only hold a read lock, but this is atomic */
+		en->lastuse = jiffies;
+
+		/* Get the least loaded destination */
+		read_lock(&en->set.lock);
+		dest = ip_vs_dest_set_min(&en->set);
+		read_unlock(&en->set.lock);
+
+		/* More than one destination + enough time passed by, cleanup */
+		if (atomic_read(&en->set.size) > 1 &&
+				time_after(jiffies, en->set.lastmod +
+				sysctl_ip_vs_lblcr_expiration)) {
+			struct ip_vs_dest *m;
+
+			write_lock(&en->set.lock);
+			m = ip_vs_dest_set_max(&en->set);
+			if (m)
+				ip_vs_dest_set_erase(&en->set, m);
+			write_unlock(&en->set.lock);
+		}
+
+		/* If the destination is not overloaded, use it */
+		if (dest && !is_overloaded(dest, svc)) {
+			read_unlock(&svc->sched_lock);
+			goto out;
+		}
+
+		/* The cache entry is invalid, time to schedule */
+		dest = __ip_vs_lblcr_schedule(svc, iph);
+		if (!dest) {
+			IP_VS_DBG(1, "no destination available\n");
+			read_unlock(&svc->sched_lock);
+			return NULL;
+		}
+
+		/* Update our cache entry */
+		write_lock(&en->set.lock);
+		ip_vs_dest_set_insert(&en->set, dest);
+		write_unlock(&en->set.lock);
+	}
+	read_unlock(&svc->sched_lock);
+
+	if (dest)
+		goto out;
+
+	/* No cache entry, time to schedule */
+	dest = __ip_vs_lblcr_schedule(svc, iph);
+	if (!dest) {
+		IP_VS_DBG(1, "no destination available\n");
+		return NULL;
+	}
+
+	/* If we fail to create a cache entry, we'll just use the valid dest */
+	write_lock(&svc->sched_lock);
+	ip_vs_lblcr_new(tbl, iph->daddr, dest);
+	write_unlock(&svc->sched_lock);
+
+out:
+	IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->daddr),
+		  NIPQUAD(dest->addr.ip),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLCR Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
+{
+	.name =			"lblcr",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
+	.init_service =		ip_vs_lblcr_init_svc,
+	.done_service =		ip_vs_lblcr_done_svc,
+	.schedule =		ip_vs_lblcr_schedule,
+};
+
+
+static int __init ip_vs_lblcr_init(void)
+{
+	int ret;
+
+	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+	ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+	if (ret)
+		unregister_sysctl_table(sysctl_header);
+	return ret;
+}
+
+
+static void __exit ip_vs_lblcr_cleanup(void)
+{
+	unregister_sysctl_table(sysctl_header);
+	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+}
+
+
+module_init(ip_vs_lblcr_init);
+module_exit(ip_vs_lblcr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
new file mode 100644
index 00000000000..b69f808ac46
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -0,0 +1,103 @@
+/*
+ * IPVS:        Least-Connection Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     added the ip_vs_lc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We think the overhead of processing active connections is 256
+	 * times higher than that of inactive connections in average. (This
+	 * 256 times might not be accurate, we will change it later) We
+	 * use the following formula to estimate the overhead now:
+	 *		  dest->activeconns*256 + dest->inactconns
+	 */
+	return (atomic_read(&dest->activeconns) << 8) +
+		atomic_read(&dest->inactconns);
+}
+
+
+/*
+ *	Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least = NULL;
+	unsigned int loh = 0, doh;
+
+	IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n");
+
+	/*
+	 * Simply select the server with the least number of
+	 *        (activeconns<<5) + inactconns
+	 * Except whose weight is equal to zero.
+	 * If the weight is equal to zero, it means that the server is
+	 * quiesced, the existing connections to the server still get
+	 * served, but no new connection is assigned to the server.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
+		    atomic_read(&dest->weight) == 0)
+			continue;
+		doh = ip_vs_lc_dest_overhead(dest);
+		if (!least || doh < loh) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	if (least)
+	IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->inactconns));
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_lc_scheduler = {
+	.name =			"lc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.schedule =		ip_vs_lc_schedule,
+};
+
+
+static int __init ip_vs_lc_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
+}
+
+static void __exit ip_vs_lc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
+}
+
+module_init(ip_vs_lc_init);
+module_exit(ip_vs_lc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
new file mode 100644
index 00000000000..9a2d8033f08
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -0,0 +1,138 @@
+/*
+ * IPVS:        Never Queue scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The NQ algorithm adopts a two-speed model. When there is an idle server
+ * available, the job will be sent to the idle server, instead of waiting
+ * for a fast one. When there is no idle server available, the job will be
+ * sent to the server that minimize its expected delay (The Shortest
+ * Expected Delay scheduling algorithm).
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
+ *
+ * The difference between NQ and SED is that NQ can improve overall
+ * system utilization.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least = NULL;
+	unsigned int loh = 0, doh;
+
+	IP_VS_DBG(6, "ip_vs_nq_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
+		    !atomic_read(&dest->weight))
+			continue;
+
+		doh = ip_vs_nq_dest_overhead(dest);
+
+		/* return the server directly if it is idle */
+		if (atomic_read(&dest->activeconns) == 0) {
+			least = dest;
+			loh = doh;
+			goto out;
+		}
+
+		if (!least ||
+		    (loh * atomic_read(&dest->weight) >
+		     doh * atomic_read(&least->weight))) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	if (!least)
+		return NULL;
+
+  out:
+	IP_VS_DBG_BUF(6, "NQ: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_nq_scheduler =
+{
+	.name =			"nq",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.schedule =		ip_vs_nq_schedule,
+};
+
+
+static int __init ip_vs_nq_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+static void __exit ip_vs_nq_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+module_init(ip_vs_nq_init);
+module_exit(ip_vs_nq_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
new file mode 100644
index 00000000000..0791f9e08fe
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -0,0 +1,288 @@
+/*
+ * ip_vs_proto.c: transport protocol load balancing support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * IPVS protocols can only be registered/unregistered when the ipvs
+ * module is loaded/unloaded, so no lock is needed in accessing the
+ * ipvs protocol table.
+ */
+
+#define IP_VS_PROTO_TAB_SIZE		32	/* must be power of 2 */
+#define IP_VS_PROTO_HASH(proto)		((proto) & (IP_VS_PROTO_TAB_SIZE-1))
+
+static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
+
+
+/*
+ *	register an ipvs protocol
+ */
+static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+
+	pp->next = ip_vs_proto_table[hash];
+	ip_vs_proto_table[hash] = pp;
+
+	if (pp->init != NULL)
+		pp->init(pp);
+
+	return 0;
+}
+
+
+/*
+ *	unregister an ipvs protocol
+ */
+static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+	struct ip_vs_protocol **pp_p;
+	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+
+	pp_p = &ip_vs_proto_table[hash];
+	for (; *pp_p; pp_p = &(*pp_p)->next) {
+		if (*pp_p == pp) {
+			*pp_p = pp->next;
+			if (pp->exit != NULL)
+				pp->exit(pp);
+			return 0;
+		}
+	}
+
+	return -ESRCH;
+}
+
+
+/*
+ *	get ip_vs_protocol object by its proto.
+ */
+struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
+{
+	struct ip_vs_protocol *pp;
+	unsigned hash = IP_VS_PROTO_HASH(proto);
+
+	for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) {
+		if (pp->protocol == proto)
+			return pp;
+	}
+
+	return NULL;
+}
+
+
+/*
+ *	Propagate event for state change to all protocols
+ */
+void ip_vs_protocol_timeout_change(int flags)
+{
+	struct ip_vs_protocol *pp;
+	int i;
+
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
+			if (pp->timeout_change)
+				pp->timeout_change(pp, flags);
+		}
+	}
+}
+
+
+int *
+ip_vs_create_timeout_table(int *table, int size)
+{
+	return kmemdup(table, size, GFP_ATOMIC);
+}
+
+
+/*
+ *	Set timeout value for state specified by name
+ */
+int
+ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to)
+{
+	int i;
+
+	if (!table || !name || !to)
+		return -EINVAL;
+
+	for (i = 0; i < num; i++) {
+		if (strcmp(names[i], name))
+			continue;
+		table[i] = to * HZ;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+
+const char * ip_vs_state_name(__u16 proto, int state)
+{
+	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+
+	if (pp == NULL || pp->state_name == NULL)
+		return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
+	return pp->state_name(state);
+}
+
+
+static void
+ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
+			     const struct sk_buff *skb,
+			     int offset,
+			     const char *msg)
+{
+	char buf[128];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else if (ih->frag_off & htons(IP_OFFSET))
+		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
+			pp->name, NIPQUAD(ih->saddr),
+			NIPQUAD(ih->daddr));
+	else {
+		__be16 _ports[2], *pptr
+;
+		pptr = skb_header_pointer(skb, offset + ih->ihl*4,
+					  sizeof(_ports), _ports);
+		if (pptr == NULL)
+			sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u",
+				pp->name,
+				NIPQUAD(ih->saddr),
+				NIPQUAD(ih->daddr));
+		else
+			sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u",
+				pp->name,
+				NIPQUAD(ih->saddr),
+				ntohs(pptr[0]),
+				NIPQUAD(ih->daddr),
+				ntohs(pptr[1]));
+	}
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
+			     const struct sk_buff *skb,
+			     int offset,
+			     const char *msg)
+{
+	char buf[192];
+	struct ipv6hdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else if (ih->nexthdr == IPPROTO_FRAGMENT)
+		sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag",
+			pp->name, NIP6(ih->saddr),
+			NIP6(ih->daddr));
+	else {
+		__be16 _ports[2], *pptr;
+
+		pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
+					  sizeof(_ports), _ports);
+		if (pptr == NULL)
+			sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT,
+				pp->name,
+				NIP6(ih->saddr),
+				NIP6(ih->daddr));
+		else
+			sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u",
+				pp->name,
+				NIP6(ih->saddr),
+				ntohs(pptr[0]),
+				NIP6(ih->daddr),
+				ntohs(pptr[1]));
+	}
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+
+void
+ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
+			  const struct sk_buff *skb,
+			  int offset,
+			  const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb->protocol == htons(ETH_P_IPV6))
+		ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
+	else
+#endif
+		ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
+}
+
+
+int __init ip_vs_protocol_init(void)
+{
+	char protocols[64];
+#define REGISTER_PROTOCOL(p)			\
+	do {					\
+		register_ip_vs_protocol(p);	\
+		strcat(protocols, ", ");	\
+		strcat(protocols, (p)->name);	\
+	} while (0)
+
+	protocols[0] = '\0';
+	protocols[2] = '\0';
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	REGISTER_PROTOCOL(&ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+	REGISTER_PROTOCOL(&ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
+#endif
+	IP_VS_INFO("Registered protocols (%s)\n", &protocols[2]);
+
+	return 0;
+}
+
+
+void ip_vs_protocol_cleanup(void)
+{
+	struct ip_vs_protocol *pp;
+	int i;
+
+	/* unregister all the ipvs protocols */
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		while ((pp = ip_vs_proto_table[i]) != NULL)
+			unregister_ip_vs_protocol(pp);
+	}
+}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
new file mode 100644
index 00000000000..80ab0c8e5b4
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -0,0 +1,235 @@
+/*
+ * ip_vs_proto_ah_esp.c:	AH/ESP IPSec load balancing support for IPVS
+ *
+ * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
+ *		Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		version 2 as published by the Free Software Foundation;
+ *
+ */
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/* TODO:
+
+struct isakmp_hdr {
+	__u8		icookie[8];
+	__u8		rcookie[8];
+	__u8		np;
+	__u8		version;
+	__u8		xchgtype;
+	__u8		flags;
+	__u32		msgid;
+	__u32		length;
+};
+
+*/
+
+#define PORT_ISAKMP	500
+
+
+static struct ip_vs_conn *
+ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		   const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		   int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+				       &iph->saddr,
+				       htons(PORT_ISAKMP),
+				       &iph->daddr,
+				       htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+				       &iph->daddr,
+				       htons(PORT_ISAKMP),
+				       &iph->saddr,
+				       htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		/*
+		 * We are not sure if the packet is from our
+		 * service, so our conn_schedule hook should return NF_ACCEPT
+		 */
+		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
+			      "%s%s %s->%s\n",
+			      inverse ? "ICMP+" : "",
+			      pp->name,
+			      IP_VS_DBG_ADDR(af, &iph->saddr),
+			      IP_VS_DBG_ADDR(af, &iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static struct ip_vs_conn *
+ah_esp_conn_out_get(int af, const struct sk_buff *skb,
+		    struct ip_vs_protocol *pp,
+		    const struct ip_vs_iphdr *iph,
+		    unsigned int proto_off,
+		    int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+					&iph->saddr,
+					htons(PORT_ISAKMP),
+					&iph->daddr,
+					htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+					&iph->daddr,
+					htons(PORT_ISAKMP),
+					&iph->saddr,
+					htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
+			      "%s%s %s->%s\n",
+			      inverse ? "ICMP+" : "",
+			      pp->name,
+			      IP_VS_DBG_ADDR(af, &iph->saddr),
+			      IP_VS_DBG_ADDR(af, &iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static int
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		     int *verdict, struct ip_vs_conn **cpp)
+{
+	/*
+	 * AH/ESP is only related traffic. Pass the packet to IP stack.
+	 */
+	*verdict = NF_ACCEPT;
+	return 0;
+}
+
+
+static void
+ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		       int offset, const char *msg)
+{
+	char buf[256];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else
+		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
+			pp->name, NIPQUAD(ih->saddr),
+			NIPQUAD(ih->daddr));
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		       int offset, const char *msg)
+{
+	char buf[256];
+	struct ipv6hdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else
+		sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT,
+			pp->name, NIP6(ih->saddr),
+			NIP6(ih->daddr));
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+static void
+ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		    int offset, const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb->protocol == htons(ETH_P_IPV6))
+		ah_esp_debug_packet_v6(pp, skb, offset, msg);
+	else
+#endif
+		ah_esp_debug_packet_v4(pp, skb, offset, msg);
+}
+
+
+static void ah_esp_init(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+static void ah_esp_exit(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+#ifdef CONFIG_IP_VS_PROTO_AH
+struct ip_vs_protocol ip_vs_protocol_ah = {
+	.name =			"AH",
+	.protocol =		IPPROTO_AH,
+	.num_states =		1,
+	.dont_defrag =		1,
+	.init =			ah_esp_init,
+	.exit =			ah_esp_exit,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ah_esp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+	.set_state_timeout =	NULL,
+};
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_ESP
+struct ip_vs_protocol ip_vs_protocol_esp = {
+	.name =			"ESP",
+	.protocol =		IPPROTO_ESP,
+	.num_states =		1,
+	.dont_defrag =		1,
+	.init =			ah_esp_init,
+	.exit =			ah_esp_exit,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ah_esp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+};
+#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
new file mode 100644
index 00000000000..dd4566ea2bf
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -0,0 +1,732 @@
+/*
+ * ip_vs_proto_tcp.c:	TCP load balancing support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>                  /* for tcphdr */
+#include <net/ip.h>
+#include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <net/ip6_checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+static struct ip_vs_conn *
+tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->saddr, pptr[0],
+					 &iph->daddr, pptr[1]);
+	} else {
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->daddr, pptr[1],
+					 &iph->saddr, pptr[0]);
+	}
+}
+
+static struct ip_vs_conn *
+tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		 int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->saddr, pptr[0],
+					  &iph->daddr, pptr[1]);
+	} else {
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->daddr, pptr[1],
+					  &iph->saddr, pptr[0]);
+	}
+}
+
+
+static int
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		  int *verdict, struct ip_vs_conn **cpp)
+{
+	struct ip_vs_service *svc;
+	struct tcphdr _tcph, *th;
+	struct ip_vs_iphdr iph;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+	th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		*verdict = NF_DROP;
+		return 0;
+	}
+
+	if (th->syn &&
+	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
+				     th->dest))) {
+		if (ip_vs_todrop()) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			ip_vs_service_put(svc);
+			*verdict = NF_DROP;
+			return 0;
+		}
+
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb);
+		if (!*cpp) {
+			*verdict = ip_vs_leave(svc, skb, pp);
+			return 0;
+		}
+		ip_vs_service_put(svc);
+	}
+	return 1;
+}
+
+
+static inline void
+tcp_fast_csum_update(int af, struct tcphdr *tcph,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldport, __be16 newport)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcph->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(tcph->check))));
+	else
+#endif
+	tcph->check =
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(tcph->check))));
+}
+
+
+static inline void
+tcp_partial_csum_update(int af, struct tcphdr *tcph,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcph->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(tcph->check))));
+	else
+#endif
+	tcph->check =
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(tcph->check))));
+}
+
+
+static int
+tcp_snat_handler(struct sk_buff *skb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct tcphdr *tcph;
+	unsigned int tcphoff;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
+	oldlen = skb->len - tcphoff;
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		if (!ip_vs_app_pkt_out(cp, skb))
+			return 0;
+	}
+
+	tcph = (void *)skb_network_header(skb) + tcphoff;
+	tcph->source = cp->vport;
+
+	/* Adjust TCP checksums */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - tcphoff));
+	} else if (!cp->app) {
+		/* Only port and addr are changed, do fast csum update */
+		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+				     cp->dport, cp->vport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		tcph->check = 0;
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
+						      &cp->caddr.in6,
+						      skb->len - tcphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
+							cp->caddr.ip,
+							skb->len - tcphoff,
+							cp->protocol,
+							skb->csum);
+
+		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+			  pp->name, tcph->check,
+			  (char*)&(tcph->check) - (char*)tcph);
+	}
+	return 1;
+}
+
+
+static int
+tcp_dnat_handler(struct sk_buff *skb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct tcphdr *tcph;
+	unsigned int tcphoff;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
+	oldlen = skb->len - tcphoff;
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/*
+		 *	Attempt ip_vs_app call.
+		 *	It will fix ip_vs_conn and iph ack_seq stuff
+		 */
+		if (!ip_vs_app_pkt_in(cp, skb))
+			return 0;
+	}
+
+	tcph = (void *)skb_network_header(skb) + tcphoff;
+	tcph->dest = cp->dport;
+
+	/*
+	 *	Adjust TCP checksums
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - tcphoff));
+	} else if (!cp->app) {
+		/* Only port and addr are changed, do fast csum update */
+		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
+				     cp->vport, cp->dport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		tcph->check = 0;
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			tcph->check = csum_ipv6_magic(&cp->caddr.in6,
+						      &cp->daddr.in6,
+						      skb->len - tcphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			tcph->check = csum_tcpudp_magic(cp->caddr.ip,
+							cp->daddr.ip,
+							skb->len - tcphoff,
+							cp->protocol,
+							skb->csum);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return 1;
+}
+
+
+static int
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	unsigned int tcphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_NONE:
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+	case CHECKSUM_COMPLETE:
+#ifdef CONFIG_IP_VS_IPV6
+		if (af == AF_INET6) {
+			if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+					    &ipv6_hdr(skb)->daddr,
+					    skb->len - tcphoff,
+					    ipv6_hdr(skb)->nexthdr,
+					    skb->csum)) {
+				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+						 "Failed checksum for");
+				return 0;
+			}
+		} else
+#endif
+			if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+					      ip_hdr(skb)->daddr,
+					      skb->len - tcphoff,
+					      ip_hdr(skb)->protocol,
+					      skb->csum)) {
+				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+						 "Failed checksum for");
+				return 0;
+			}
+		break;
+	default:
+		/* No need to checksum. */
+		break;
+	}
+
+	return 1;
+}
+
+
+#define TCP_DIR_INPUT		0
+#define TCP_DIR_OUTPUT		4
+#define TCP_DIR_INPUT_ONLY	8
+
+static const int tcp_state_off[IP_VS_DIR_LAST] = {
+	[IP_VS_DIR_INPUT]		=	TCP_DIR_INPUT,
+	[IP_VS_DIR_OUTPUT]		=	TCP_DIR_OUTPUT,
+	[IP_VS_DIR_INPUT_ONLY]		=	TCP_DIR_INPUT_ONLY,
+};
+
+/*
+ *	Timeout table[state]
+ */
+static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+	[IP_VS_TCP_S_NONE]		=	2*HZ,
+	[IP_VS_TCP_S_ESTABLISHED]	=	15*60*HZ,
+	[IP_VS_TCP_S_SYN_SENT]		=	2*60*HZ,
+	[IP_VS_TCP_S_SYN_RECV]		=	1*60*HZ,
+	[IP_VS_TCP_S_FIN_WAIT]		=	2*60*HZ,
+	[IP_VS_TCP_S_TIME_WAIT]		=	2*60*HZ,
+	[IP_VS_TCP_S_CLOSE]		=	10*HZ,
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	60*HZ,
+	[IP_VS_TCP_S_LAST_ACK]		=	30*HZ,
+	[IP_VS_TCP_S_LISTEN]		=	2*60*HZ,
+	[IP_VS_TCP_S_SYNACK]		=	120*HZ,
+	[IP_VS_TCP_S_LAST]		=	2*HZ,
+};
+
+static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
+	[IP_VS_TCP_S_NONE]		=	"NONE",
+	[IP_VS_TCP_S_ESTABLISHED]	=	"ESTABLISHED",
+	[IP_VS_TCP_S_SYN_SENT]		=	"SYN_SENT",
+	[IP_VS_TCP_S_SYN_RECV]		=	"SYN_RECV",
+	[IP_VS_TCP_S_FIN_WAIT]		=	"FIN_WAIT",
+	[IP_VS_TCP_S_TIME_WAIT]		=	"TIME_WAIT",
+	[IP_VS_TCP_S_CLOSE]		=	"CLOSE",
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	"CLOSE_WAIT",
+	[IP_VS_TCP_S_LAST_ACK]		=	"LAST_ACK",
+	[IP_VS_TCP_S_LISTEN]		=	"LISTEN",
+	[IP_VS_TCP_S_SYNACK]		=	"SYNACK",
+	[IP_VS_TCP_S_LAST]		=	"BUG!",
+};
+
+#define sNO IP_VS_TCP_S_NONE
+#define sES IP_VS_TCP_S_ESTABLISHED
+#define sSS IP_VS_TCP_S_SYN_SENT
+#define sSR IP_VS_TCP_S_SYN_RECV
+#define sFW IP_VS_TCP_S_FIN_WAIT
+#define sTW IP_VS_TCP_S_TIME_WAIT
+#define sCL IP_VS_TCP_S_CLOSE
+#define sCW IP_VS_TCP_S_CLOSE_WAIT
+#define sLA IP_VS_TCP_S_LAST_ACK
+#define sLI IP_VS_TCP_S_LISTEN
+#define sSA IP_VS_TCP_S_SYNACK
+
+struct tcp_states_t {
+	int next_state[IP_VS_TCP_S_LAST];
+};
+
+static const char * tcp_state_name(int state)
+{
+	if (state >= IP_VS_TCP_S_LAST)
+		return "ERR!";
+	return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
+}
+
+static struct tcp_states_t tcp_states [] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct tcp_states_t tcp_states_dos [] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
+/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct tcp_states_t *tcp_state_table = tcp_states;
+
+
+static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+{
+	int on = (flags & 1);		/* secure_tcp */
+
+	/*
+	** FIXME: change secure_tcp to independent sysctl var
+	** or make it per-service or per-app because it is valid
+	** for most if not for all of the applications. Something
+	** like "capabilities" (flags) for each object.
+	*/
+	tcp_state_table = (on? tcp_states_dos : tcp_states);
+}
+
+static int
+tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
+				       tcp_state_name_table, sname, to);
+}
+
+static inline int tcp_state_idx(struct tcphdr *th)
+{
+	if (th->rst)
+		return 3;
+	if (th->syn)
+		return 0;
+	if (th->fin)
+		return 1;
+	if (th->ack)
+		return 2;
+	return -1;
+}
+
+static inline void
+set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+	      int direction, struct tcphdr *th)
+{
+	int state_idx;
+	int new_state = IP_VS_TCP_S_CLOSE;
+	int state_off = tcp_state_off[direction];
+
+	/*
+	 *    Update state offset to INPUT_ONLY if necessary
+	 *    or delete NO_OUTPUT flag if output packet detected
+	 */
+	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
+		if (state_off == TCP_DIR_OUTPUT)
+			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
+		else
+			state_off = TCP_DIR_INPUT_ONLY;
+	}
+
+	if ((state_idx = tcp_state_idx(th)) < 0) {
+		IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
+		goto tcp_state_out;
+	}
+
+	new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+
+  tcp_state_out:
+	if (new_state != cp->state) {
+		struct ip_vs_dest *dest = cp->dest;
+
+		IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
+			      "%s:%d state: %s->%s conn->refcnt:%d\n",
+			      pp->name,
+			      ((state_off == TCP_DIR_OUTPUT) ?
+			       "output " : "input "),
+			      th->syn ? 'S' : '.',
+			      th->fin ? 'F' : '.',
+			      th->ack ? 'A' : '.',
+			      th->rst ? 'R' : '.',
+			      IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+			      ntohs(cp->dport),
+			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+			      ntohs(cp->cport),
+			      tcp_state_name(cp->state),
+			      tcp_state_name(new_state),
+			      atomic_read(&cp->refcnt));
+
+		if (dest) {
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			    (new_state != IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				   (new_state == IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+	}
+
+	cp->timeout = pp->timeout_table[cp->state = new_state];
+}
+
+
+/*
+ *	Handle state transitions
+ */
+static int
+tcp_state_transition(struct ip_vs_conn *cp, int direction,
+		     const struct sk_buff *skb,
+		     struct ip_vs_protocol *pp)
+{
+	struct tcphdr _tcph, *th;
+
+#ifdef CONFIG_IP_VS_IPV6
+	int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+	int ihl = ip_hdrlen(skb);
+#endif
+
+	th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return 0;
+
+	spin_lock(&cp->lock);
+	set_tcp_state(pp, cp, direction, th);
+	spin_unlock(&cp->lock);
+
+	return 1;
+}
+
+
+/*
+ *	Hash table for TCP application incarnations
+ */
+#define	TCP_APP_TAB_BITS	4
+#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
+#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
+
+static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(tcp_app_lock);
+
+static inline __u16 tcp_app_hashkey(__be16 port)
+{
+	return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
+		& TCP_APP_TAB_MASK;
+}
+
+
+static int tcp_register_app(struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash;
+	__be16 port = inc->port;
+	int ret = 0;
+
+	hash = tcp_app_hashkey(port);
+
+	spin_lock_bh(&tcp_app_lock);
+	list_for_each_entry(i, &tcp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add(&inc->p_list, &tcp_apps[hash]);
+	atomic_inc(&ip_vs_protocol_tcp.appcnt);
+
+  out:
+	spin_unlock_bh(&tcp_app_lock);
+	return ret;
+}
+
+
+static void
+tcp_unregister_app(struct ip_vs_app *inc)
+{
+	spin_lock_bh(&tcp_app_lock);
+	atomic_dec(&ip_vs_protocol_tcp.appcnt);
+	list_del(&inc->p_list);
+	spin_unlock_bh(&tcp_app_lock);
+}
+
+
+static int
+tcp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+
+	/* Lookup application incarnations and bind the right one */
+	hash = tcp_app_hashkey(cp->vport);
+
+	spin_lock(&tcp_app_lock);
+	list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+			spin_unlock(&tcp_app_lock);
+
+			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+				      "%s:%u to app %s on port %u\n",
+				      __func__,
+				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				      ntohs(cp->cport),
+				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+				      ntohs(cp->vport),
+				      inc->name, ntohs(inc->port));
+
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			goto out;
+		}
+	}
+	spin_unlock(&tcp_app_lock);
+
+  out:
+	return result;
+}
+
+
+/*
+ *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
+ */
+void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+{
+	spin_lock(&cp->lock);
+	cp->state = IP_VS_TCP_S_LISTEN;
+	cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+	spin_unlock(&cp->lock);
+}
+
+
+static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+{
+	IP_VS_INIT_HASH_TABLE(tcp_apps);
+	pp->timeout_table = tcp_timeouts;
+}
+
+
+static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+{
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_tcp = {
+	.name =			"TCP",
+	.protocol =		IPPROTO_TCP,
+	.num_states =		IP_VS_TCP_S_LAST,
+	.dont_defrag =		0,
+	.appcnt =		ATOMIC_INIT(0),
+	.init =			ip_vs_tcp_init,
+	.exit =			ip_vs_tcp_exit,
+	.register_app =		tcp_register_app,
+	.unregister_app =	tcp_unregister_app,
+	.conn_schedule =	tcp_conn_schedule,
+	.conn_in_get =		tcp_conn_in_get,
+	.conn_out_get =		tcp_conn_out_get,
+	.snat_handler =		tcp_snat_handler,
+	.dnat_handler =		tcp_dnat_handler,
+	.csum_check =		tcp_csum_check,
+	.state_name =		tcp_state_name,
+	.state_transition =	tcp_state_transition,
+	.app_conn_bind =	tcp_app_conn_bind,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	tcp_timeout_change,
+	.set_state_timeout =	tcp_set_state_timeout,
+};
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
new file mode 100644
index 00000000000..6eb6039d634
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -0,0 +1,533 @@
+/*
+ * ip_vs_proto_udp.c:	UDP load balancing support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/udp.h>
+
+#include <net/ip_vs.h>
+#include <net/ip.h>
+#include <net/ip6_checksum.h>
+
+static struct ip_vs_conn *
+udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		int inverse)
+{
+	struct ip_vs_conn *cp;
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_in_get(af, iph->protocol,
+				       &iph->saddr, pptr[0],
+				       &iph->daddr, pptr[1]);
+	} else {
+		cp = ip_vs_conn_in_get(af, iph->protocol,
+				       &iph->daddr, pptr[1],
+				       &iph->saddr, pptr[0]);
+	}
+
+	return cp;
+}
+
+
+static struct ip_vs_conn *
+udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		 int inverse)
+{
+	struct ip_vs_conn *cp;
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_out_get(af, iph->protocol,
+					&iph->saddr, pptr[0],
+					&iph->daddr, pptr[1]);
+	} else {
+		cp = ip_vs_conn_out_get(af, iph->protocol,
+					&iph->daddr, pptr[1],
+					&iph->saddr, pptr[0]);
+	}
+
+	return cp;
+}
+
+
+static int
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		  int *verdict, struct ip_vs_conn **cpp)
+{
+	struct ip_vs_service *svc;
+	struct udphdr _udph, *uh;
+	struct ip_vs_iphdr iph;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+	uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
+	if (uh == NULL) {
+		*verdict = NF_DROP;
+		return 0;
+	}
+
+	svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+				&iph.daddr, uh->dest);
+	if (svc) {
+		if (ip_vs_todrop()) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			ip_vs_service_put(svc);
+			*verdict = NF_DROP;
+			return 0;
+		}
+
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb);
+		if (!*cpp) {
+			*verdict = ip_vs_leave(svc, skb, pp);
+			return 0;
+		}
+		ip_vs_service_put(svc);
+	}
+	return 1;
+}
+
+
+static inline void
+udp_fast_csum_update(int af, struct udphdr *uhdr,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldport, __be16 newport)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		uhdr->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(uhdr->check))));
+	else
+#endif
+		uhdr->check =
+			csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(uhdr->check))));
+	if (!uhdr->check)
+		uhdr->check = CSUM_MANGLED_0;
+}
+
+static inline void
+udp_partial_csum_update(int af, struct udphdr *uhdr,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		uhdr->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(uhdr->check))));
+	else
+#endif
+	uhdr->check =
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(uhdr->check))));
+}
+
+
+static int
+udp_snat_handler(struct sk_buff *skb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct udphdr *udph;
+	unsigned int udphoff;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
+	oldlen = skb->len - udphoff;
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/*
+		 *	Call application helper if needed
+		 */
+		if (!ip_vs_app_pkt_out(cp, skb))
+			return 0;
+	}
+
+	udph = (void *)skb_network_header(skb) + udphoff;
+	udph->source = cp->vport;
+
+	/*
+	 *	Adjust UDP checksums
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - udphoff));
+	} else if (!cp->app && (udph->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+				     cp->dport, cp->vport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		udph->check = 0;
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			udph->check = csum_ipv6_magic(&cp->vaddr.in6,
+						      &cp->caddr.in6,
+						      skb->len - udphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			udph->check = csum_tcpudp_magic(cp->vaddr.ip,
+							cp->caddr.ip,
+							skb->len - udphoff,
+							cp->protocol,
+							skb->csum);
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+			  pp->name, udph->check,
+			  (char*)&(udph->check) - (char*)udph);
+	}
+	return 1;
+}
+
+
+static int
+udp_dnat_handler(struct sk_buff *skb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct udphdr *udph;
+	unsigned int udphoff;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
+	oldlen = skb->len - udphoff;
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/*
+		 *	Attempt ip_vs_app call.
+		 *	It will fix ip_vs_conn
+		 */
+		if (!ip_vs_app_pkt_in(cp, skb))
+			return 0;
+	}
+
+	udph = (void *)skb_network_header(skb) + udphoff;
+	udph->dest = cp->dport;
+
+	/*
+	 *	Adjust UDP checksums
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - udphoff));
+	} else if (!cp->app && (udph->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
+				     cp->vport, cp->dport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		udph->check = 0;
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			udph->check = csum_ipv6_magic(&cp->caddr.in6,
+						      &cp->daddr.in6,
+						      skb->len - udphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			udph->check = csum_tcpudp_magic(cp->caddr.ip,
+							cp->daddr.ip,
+							skb->len - udphoff,
+							cp->protocol,
+							skb->csum);
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return 1;
+}
+
+
+static int
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	struct udphdr _udph, *uh;
+	unsigned int udphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
+
+	uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
+	if (uh == NULL)
+		return 0;
+
+	if (uh->check != 0) {
+		switch (skb->ip_summed) {
+		case CHECKSUM_NONE:
+			skb->csum = skb_checksum(skb, udphoff,
+						 skb->len - udphoff, 0);
+		case CHECKSUM_COMPLETE:
+#ifdef CONFIG_IP_VS_IPV6
+			if (af == AF_INET6) {
+				if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						    &ipv6_hdr(skb)->daddr,
+						    skb->len - udphoff,
+						    ipv6_hdr(skb)->nexthdr,
+						    skb->csum)) {
+					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+							 "Failed checksum for");
+					return 0;
+				}
+			} else
+#endif
+				if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						      ip_hdr(skb)->daddr,
+						      skb->len - udphoff,
+						      ip_hdr(skb)->protocol,
+						      skb->csum)) {
+					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+							 "Failed checksum for");
+					return 0;
+				}
+			break;
+		default:
+			/* No need to checksum. */
+			break;
+		}
+	}
+	return 1;
+}
+
+
+/*
+ *	Note: the caller guarantees that only one of register_app,
+ *	unregister_app or app_conn_bind is called each time.
+ */
+
+#define	UDP_APP_TAB_BITS	4
+#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
+#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
+
+static struct list_head udp_apps[UDP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(udp_app_lock);
+
+static inline __u16 udp_app_hashkey(__be16 port)
+{
+	return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
+		& UDP_APP_TAB_MASK;
+}
+
+
+static int udp_register_app(struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash;
+	__be16 port = inc->port;
+	int ret = 0;
+
+	hash = udp_app_hashkey(port);
+
+
+	spin_lock_bh(&udp_app_lock);
+	list_for_each_entry(i, &udp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add(&inc->p_list, &udp_apps[hash]);
+	atomic_inc(&ip_vs_protocol_udp.appcnt);
+
+  out:
+	spin_unlock_bh(&udp_app_lock);
+	return ret;
+}
+
+
+static void
+udp_unregister_app(struct ip_vs_app *inc)
+{
+	spin_lock_bh(&udp_app_lock);
+	atomic_dec(&ip_vs_protocol_udp.appcnt);
+	list_del(&inc->p_list);
+	spin_unlock_bh(&udp_app_lock);
+}
+
+
+static int udp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+
+	/* Lookup application incarnations and bind the right one */
+	hash = udp_app_hashkey(cp->vport);
+
+	spin_lock(&udp_app_lock);
+	list_for_each_entry(inc, &udp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+			spin_unlock(&udp_app_lock);
+
+			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+				      "%s:%u to app %s on port %u\n",
+				      __func__,
+				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				      ntohs(cp->cport),
+				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+				      ntohs(cp->vport),
+				      inc->name, ntohs(inc->port));
+
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			goto out;
+		}
+	}
+	spin_unlock(&udp_app_lock);
+
+  out:
+	return result;
+}
+
+
+static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+	[IP_VS_UDP_S_NORMAL]		=	5*60*HZ,
+	[IP_VS_UDP_S_LAST]		=	2*HZ,
+};
+
+static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
+	[IP_VS_UDP_S_NORMAL]		=	"UDP",
+	[IP_VS_UDP_S_LAST]		=	"BUG!",
+};
+
+
+static int
+udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
+				       udp_state_name_table, sname, to);
+}
+
+static const char * udp_state_name(int state)
+{
+	if (state >= IP_VS_UDP_S_LAST)
+		return "ERR!";
+	return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
+}
+
+static int
+udp_state_transition(struct ip_vs_conn *cp, int direction,
+		     const struct sk_buff *skb,
+		     struct ip_vs_protocol *pp)
+{
+	cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+	return 1;
+}
+
+static void udp_init(struct ip_vs_protocol *pp)
+{
+	IP_VS_INIT_HASH_TABLE(udp_apps);
+	pp->timeout_table = udp_timeouts;
+}
+
+static void udp_exit(struct ip_vs_protocol *pp)
+{
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_udp = {
+	.name =			"UDP",
+	.protocol =		IPPROTO_UDP,
+	.num_states =		IP_VS_UDP_S_LAST,
+	.dont_defrag =		0,
+	.init =			udp_init,
+	.exit =			udp_exit,
+	.conn_schedule =	udp_conn_schedule,
+	.conn_in_get =		udp_conn_in_get,
+	.conn_out_get =		udp_conn_out_get,
+	.snat_handler =		udp_snat_handler,
+	.dnat_handler =		udp_dnat_handler,
+	.csum_check =		udp_csum_check,
+	.state_transition =	udp_state_transition,
+	.state_name =		udp_state_name,
+	.register_app =		udp_register_app,
+	.unregister_app =	udp_unregister_app,
+	.app_conn_bind =	udp_app_conn_bind,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	NULL,
+	.set_state_timeout =	udp_set_state_timeout,
+};
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
new file mode 100644
index 00000000000..a22195f68ac
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -0,0 +1,112 @@
+/*
+ * IPVS:        Round-Robin Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Fixes/Changes:
+ *     Wensong Zhang            :     changed the ip_vs_rr_schedule to return dest
+ *     Julian Anastasov         :     fixed the NULL pointer access bug in debugging
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_rr_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+/*
+ * Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct list_head *p, *q;
+	struct ip_vs_dest *dest;
+
+	IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n");
+
+	write_lock(&svc->sched_lock);
+	p = (struct list_head *)svc->sched_data;
+	p = p->next;
+	q = p;
+	do {
+		/* skip list head */
+		if (q == &svc->destinations) {
+			q = q->next;
+			continue;
+		}
+
+		dest = list_entry(q, struct ip_vs_dest, n_list);
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0)
+			/* HIT */
+			goto out;
+		q = q->next;
+	} while (q != p);
+	write_unlock(&svc->sched_lock);
+	return NULL;
+
+  out:
+	svc->sched_data = q;
+	write_unlock(&svc->sched_lock);
+	IP_VS_DBG_BUF(6, "RR: server %s:%u "
+		      "activeconns %d refcnt %d weight %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+		      atomic_read(&dest->activeconns),
+		      atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+
+	return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_rr_scheduler = {
+	.name =			"rr",			/* name */
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.init_service =		ip_vs_rr_init_svc,
+	.update_service =	ip_vs_rr_update_svc,
+	.schedule =		ip_vs_rr_schedule,
+};
+
+static int __init ip_vs_rr_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+static void __exit ip_vs_rr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+module_init(ip_vs_rr_init);
+module_exit(ip_vs_rr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
new file mode 100644
index 00000000000..a46ad9e3501
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -0,0 +1,251 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <asm/string.h>
+#include <linux/kmod.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+/*
+ *  IPVS scheduler list
+ */
+static LIST_HEAD(ip_vs_schedulers);
+
+/* lock for service table */
+static DEFINE_RWLOCK(__ip_vs_sched_lock);
+
+
+/*
+ *  Bind a service with a scheduler
+ */
+int ip_vs_bind_scheduler(struct ip_vs_service *svc,
+			 struct ip_vs_scheduler *scheduler)
+{
+	int ret;
+
+	if (svc == NULL) {
+		IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
+		return -EINVAL;
+	}
+	if (scheduler == NULL) {
+		IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
+		return -EINVAL;
+	}
+
+	svc->scheduler = scheduler;
+
+	if (scheduler->init_service) {
+		ret = scheduler->init_service(svc);
+		if (ret) {
+			IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ *  Unbind a service with its scheduler
+ */
+int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
+{
+	struct ip_vs_scheduler *sched;
+
+	if (svc == NULL) {
+		IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
+		return -EINVAL;
+	}
+
+	sched = svc->scheduler;
+	if (sched == NULL) {
+		IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
+		return -EINVAL;
+	}
+
+	if (sched->done_service) {
+		if (sched->done_service(svc) != 0) {
+			IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
+			return -EINVAL;
+		}
+	}
+
+	svc->scheduler = NULL;
+	return 0;
+}
+
+
+/*
+ *  Get scheduler in the scheduler list by name
+ */
+static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+
+	IP_VS_DBG(2, "ip_vs_sched_getbyname(): sched_name \"%s\"\n",
+		  sched_name);
+
+	read_lock_bh(&__ip_vs_sched_lock);
+
+	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+		/*
+		 * Test and get the modules atomically
+		 */
+		if (sched->module && !try_module_get(sched->module)) {
+			/*
+			 * This scheduler is just deleted
+			 */
+			continue;
+		}
+		if (strcmp(sched_name, sched->name)==0) {
+			/* HIT */
+			read_unlock_bh(&__ip_vs_sched_lock);
+			return sched;
+		}
+		if (sched->module)
+			module_put(sched->module);
+	}
+
+	read_unlock_bh(&__ip_vs_sched_lock);
+	return NULL;
+}
+
+
+/*
+ *  Lookup scheduler and try to load it if it doesn't exist
+ */
+struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+
+	/*
+	 *  Search for the scheduler by sched_name
+	 */
+	sched = ip_vs_sched_getbyname(sched_name);
+
+	/*
+	 *  If scheduler not found, load the module and search again
+	 */
+	if (sched == NULL) {
+		request_module("ip_vs_%s", sched_name);
+		sched = ip_vs_sched_getbyname(sched_name);
+	}
+
+	return sched;
+}
+
+void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
+{
+	if (scheduler->module)
+		module_put(scheduler->module);
+}
+
+
+/*
+ *  Register a scheduler in the scheduler list
+ */
+int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	struct ip_vs_scheduler *sched;
+
+	if (!scheduler) {
+		IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	if (!scheduler->name) {
+		IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
+		return -EINVAL;
+	}
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	write_lock_bh(&__ip_vs_sched_lock);
+
+	if (!list_empty(&scheduler->n_list)) {
+		write_unlock_bh(&__ip_vs_sched_lock);
+		ip_vs_use_count_dec();
+		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+			  "already linked\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *  Make sure that the scheduler with this name doesn't exist
+	 *  in the scheduler list.
+	 */
+	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+		if (strcmp(scheduler->name, sched->name) == 0) {
+			write_unlock_bh(&__ip_vs_sched_lock);
+			ip_vs_use_count_dec();
+			IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+					"already existed in the system\n",
+					scheduler->name);
+			return -EINVAL;
+		}
+	}
+	/*
+	 *	Add it into the d-linked scheduler list
+	 */
+	list_add(&scheduler->n_list, &ip_vs_schedulers);
+	write_unlock_bh(&__ip_vs_sched_lock);
+
+	IP_VS_INFO("[%s] scheduler registered.\n", scheduler->name);
+
+	return 0;
+}
+
+
+/*
+ *  Unregister a scheduler from the scheduler list
+ */
+int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	if (!scheduler) {
+		IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	write_lock_bh(&__ip_vs_sched_lock);
+	if (list_empty(&scheduler->n_list)) {
+		write_unlock_bh(&__ip_vs_sched_lock);
+		IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
+			  "is not in the list. failed\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *	Remove it from the d-linked scheduler list
+	 */
+	list_del(&scheduler->n_list);
+	write_unlock_bh(&__ip_vs_sched_lock);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	IP_VS_INFO("[%s] scheduler unregistered.\n", scheduler->name);
+
+	return 0;
+}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
new file mode 100644
index 00000000000..7d2f22f04b8
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -0,0 +1,140 @@
+/*
+ * IPVS:        Shortest Expected Delay scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The SED algorithm attempts to minimize each job's expected delay until
+ * completion. The expected delay that the job will experience is
+ * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
+ * jobs on the ith server and Ui is the fixed service rate (weight) of
+ * the ith server. The SED algorithm adopts a greedy policy that each does
+ * what is in its own best interest, i.e. to join the queue which would
+ * minimize its expected delay of completion.
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
+ *
+ * The difference between SED and WLC is that SED includes the incoming
+ * job in the cost function (the increment of 1). SED may outperform
+ * WLC, while scheduling big jobs under larger heterogeneous systems
+ * (the server weight varies a lot).
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_sed_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		doh = ip_vs_sed_dest_overhead(dest);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "SED: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_sed_scheduler =
+{
+	.name =			"sed",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.schedule =		ip_vs_sed_schedule,
+};
+
+
+static int __init ip_vs_sed_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+static void __exit ip_vs_sed_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+module_init(ip_vs_sed_init);
+module_exit(ip_vs_sed_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
new file mode 100644
index 00000000000..1d96de27fef
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -0,0 +1,258 @@
+/*
+ * IPVS:        Source Hashing scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The sh algorithm is to select server by the hash key of source IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[src_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded) or (n.weight <= 0) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet source IP address to the current server
+ * array. If the sh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      IPVS SH bucket
+ */
+struct ip_vs_sh_bucket {
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+};
+
+/*
+ *     for IPVS SH entry hash table
+ */
+#ifndef CONFIG_IP_VS_SH_TAB_BITS
+#define CONFIG_IP_VS_SH_TAB_BITS        8
+#endif
+#define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
+#define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
+#define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
+
+
+/*
+ *	Returns hash value for IPVS SH entry
+ */
+static inline unsigned ip_vs_sh_hashkey(__be32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr)
+{
+	return (tbl[ip_vs_sh_hashkey(addr)]).dest;
+}
+
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+
+	b = tbl;
+	p = &svc->destinations;
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		if (list_empty(p)) {
+			b->dest = NULL;
+		} else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			atomic_inc(&dest->refcnt);
+			b->dest = dest;
+
+			p = p->next;
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+
+	b = tbl;
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		if (b->dest) {
+			atomic_dec(&b->dest->refcnt);
+			b->dest = NULL;
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl;
+
+	/* allocate the SH table for this service */
+	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
+		      GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_sh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_sh_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+	return 0;
+}
+
+
+static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_sh_flush(tbl);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_sh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+/*
+ *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
+ *      consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+	return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ *      Source Hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_sh_bucket *tbl;
+	struct iphdr *iph = ip_hdr(skb);
+
+	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
+	dest = ip_vs_sh_get(tbl, iph->saddr);
+	if (!dest
+	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+	    || atomic_read(&dest->weight) <= 0
+	    || is_overloaded(dest)) {
+		return NULL;
+	}
+
+	IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->saddr),
+		  NIPQUAD(dest->addr.ip),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS SH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_sh_scheduler =
+{
+	.name =			"sh",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list	 =		LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
+	.init_service =		ip_vs_sh_init_svc,
+	.done_service =		ip_vs_sh_done_svc,
+	.update_service =	ip_vs_sh_update_svc,
+	.schedule =		ip_vs_sh_schedule,
+};
+
+
+static int __init ip_vs_sh_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+static void __exit ip_vs_sh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+module_init(ip_vs_sh_init);
+module_exit(ip_vs_sh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
new file mode 100644
index 00000000000..de5e7e118ee
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -0,0 +1,942 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * ip_vs_sync:  sync connection info from master load balancer to backups
+ *              through multicast
+ *
+ * Changes:
+ *	Alexandre Cassen	:	Added master & backup support at a time.
+ *	Alexandre Cassen	:	Added SyncID support for incoming sync
+ *					messages filtering.
+ *	Justin Ossevoort	:	Fix endian problem on sync message size.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/inetdevice.h>
+#include <linux/net.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/igmp.h>                 /* for ip_mc_join_group */
+#include <linux/udp.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/kernel.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <net/ip_vs.h>
+
+#define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
+#define IP_VS_SYNC_PORT  8848          /* multicast port */
+
+
+/*
+ *	IPVS sync connection entry
+ */
+struct ip_vs_sync_conn {
+	__u8			reserved;
+
+	/* Protocol, addresses and port numbers */
+	__u8			protocol;       /* Which protocol (TCP/UDP) */
+	__be16			cport;
+	__be16                  vport;
+	__be16                  dport;
+	__be32                  caddr;          /* client address */
+	__be32                  vaddr;          /* virtual address */
+	__be32                  daddr;          /* destination address */
+
+	/* Flags and state transition */
+	__be16                  flags;          /* status flags */
+	__be16                  state;          /* state info */
+
+	/* The sequence options start here */
+};
+
+struct ip_vs_sync_conn_options {
+	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
+	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
+};
+
+struct ip_vs_sync_thread_data {
+	struct socket *sock;
+	char *buf;
+};
+
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
+#define FULL_CONN_SIZE  \
+(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+
+
+/*
+  The master mulitcasts messages to the backup load balancers in the
+  following format.
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (1)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                            .                                  |
+      |                            .                                  |
+      |                            .                                  |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (n)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+#define SYNC_MESG_HEADER_LEN	4
+#define MAX_CONNS_PER_SYNCBUFF	255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
+
+struct ip_vs_sync_mesg {
+	__u8                    nr_conns;
+	__u8                    syncid;
+	__u16                   size;
+
+	/* ip_vs_sync_conn entries start here */
+};
+
+/* the maximum length of sync (sending/receiving) message */
+static int sync_send_mesg_maxlen;
+static int sync_recv_mesg_maxlen;
+
+struct ip_vs_sync_buff {
+	struct list_head        list;
+	unsigned long           firstuse;
+
+	/* pointers for the message data */
+	struct ip_vs_sync_mesg  *mesg;
+	unsigned char           *head;
+	unsigned char           *end;
+};
+
+
+/* the sync_buff list head and the lock */
+static LIST_HEAD(ip_vs_sync_queue);
+static DEFINE_SPINLOCK(ip_vs_sync_lock);
+
+/* current sync_buff for accepting new conn entries */
+static struct ip_vs_sync_buff   *curr_sb = NULL;
+static DEFINE_SPINLOCK(curr_sb_lock);
+
+/* ipvs sync daemon state */
+volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
+volatile int ip_vs_master_syncid = 0;
+volatile int ip_vs_backup_syncid = 0;
+
+/* multicast interface name */
+char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+/* sync daemon tasks */
+static struct task_struct *sync_master_thread;
+static struct task_struct *sync_backup_thread;
+
+/* multicast addr */
+static struct sockaddr_in mcast_addr = {
+	.sin_family		= AF_INET,
+	.sin_port		= __constant_htons(IP_VS_SYNC_PORT),
+	.sin_addr.s_addr	= __constant_htonl(IP_VS_SYNC_GROUP),
+};
+
+
+static inline struct ip_vs_sync_buff *sb_dequeue(void)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&ip_vs_sync_lock);
+	if (list_empty(&ip_vs_sync_queue)) {
+		sb = NULL;
+	} else {
+		sb = list_entry(ip_vs_sync_queue.next,
+				struct ip_vs_sync_buff,
+				list);
+		list_del(&sb->list);
+	}
+	spin_unlock_bh(&ip_vs_sync_lock);
+
+	return sb;
+}
+
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+{
+	struct ip_vs_sync_buff *sb;
+
+	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+		return NULL;
+
+	if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+		kfree(sb);
+		return NULL;
+	}
+	sb->mesg->nr_conns = 0;
+	sb->mesg->syncid = ip_vs_master_syncid;
+	sb->mesg->size = 4;
+	sb->head = (unsigned char *)sb->mesg + 4;
+	sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+	sb->firstuse = jiffies;
+	return sb;
+}
+
+static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
+{
+	kfree(sb->mesg);
+	kfree(sb);
+}
+
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+{
+	spin_lock(&ip_vs_sync_lock);
+	if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+		list_add_tail(&sb->list, &ip_vs_sync_queue);
+	else
+		ip_vs_sync_buff_release(sb);
+	spin_unlock(&ip_vs_sync_lock);
+}
+
+/*
+ *	Get the current sync buffer if it has been created for more
+ *	than the specified time or the specified time is zero.
+ */
+static inline struct ip_vs_sync_buff *
+get_curr_sync_buff(unsigned long time)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&curr_sb_lock);
+	if (curr_sb && (time == 0 ||
+			time_before(jiffies - curr_sb->firstuse, time))) {
+		sb = curr_sb;
+		curr_sb = NULL;
+	} else
+		sb = NULL;
+	spin_unlock_bh(&curr_sb_lock);
+	return sb;
+}
+
+
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ */
+void ip_vs_sync_conn(struct ip_vs_conn *cp)
+{
+	struct ip_vs_sync_mesg *m;
+	struct ip_vs_sync_conn *s;
+	int len;
+
+	spin_lock(&curr_sb_lock);
+	if (!curr_sb) {
+		if (!(curr_sb=ip_vs_sync_buff_create())) {
+			spin_unlock(&curr_sb_lock);
+			IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
+			return;
+		}
+	}
+
+	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+		SIMPLE_CONN_SIZE;
+	m = curr_sb->mesg;
+	s = (struct ip_vs_sync_conn *)curr_sb->head;
+
+	/* copy members */
+	s->protocol = cp->protocol;
+	s->cport = cp->cport;
+	s->vport = cp->vport;
+	s->dport = cp->dport;
+	s->caddr = cp->caddr.ip;
+	s->vaddr = cp->vaddr.ip;
+	s->daddr = cp->daddr.ip;
+	s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
+	s->state = htons(cp->state);
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+		struct ip_vs_sync_conn_options *opt =
+			(struct ip_vs_sync_conn_options *)&s[1];
+		memcpy(opt, &cp->in_seq, sizeof(*opt));
+	}
+
+	m->nr_conns++;
+	m->size += len;
+	curr_sb->head += len;
+
+	/* check if there is a space for next one */
+	if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
+		sb_queue_tail(curr_sb);
+		curr_sb = NULL;
+	}
+	spin_unlock(&curr_sb_lock);
+
+	/* synchronize its controller if it has */
+	if (cp->control)
+		ip_vs_sync_conn(cp->control);
+}
+
+
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ */
+static void ip_vs_process_message(const char *buffer, const size_t buflen)
+{
+	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
+	struct ip_vs_sync_conn *s;
+	struct ip_vs_sync_conn_options *opt;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_dest *dest;
+	char *p;
+	int i;
+
+	if (buflen < sizeof(struct ip_vs_sync_mesg)) {
+		IP_VS_ERR_RL("sync message header too short\n");
+		return;
+	}
+
+	/* Convert size back to host byte order */
+	m->size = ntohs(m->size);
+
+	if (buflen != m->size) {
+		IP_VS_ERR_RL("bogus sync message size\n");
+		return;
+	}
+
+	/* SyncID sanity check */
+	if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
+		IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
+			  m->syncid);
+		return;
+	}
+
+	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+	for (i=0; i<m->nr_conns; i++) {
+		unsigned flags, state;
+
+		if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
+			IP_VS_ERR_RL("bogus conn in sync message\n");
+			return;
+		}
+		s = (struct ip_vs_sync_conn *) p;
+		flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
+		flags &= ~IP_VS_CONN_F_HASHED;
+		if (flags & IP_VS_CONN_F_SEQ_MASK) {
+			opt = (struct ip_vs_sync_conn_options *)&s[1];
+			p += FULL_CONN_SIZE;
+			if (p > buffer+buflen) {
+				IP_VS_ERR_RL("bogus conn options in sync message\n");
+				return;
+			}
+		} else {
+			opt = NULL;
+			p += SIMPLE_CONN_SIZE;
+		}
+
+		state = ntohs(s->state);
+		if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+			pp = ip_vs_proto_get(s->protocol);
+			if (!pp) {
+				IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+					s->protocol);
+				continue;
+			}
+			if (state >= pp->num_states) {
+				IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+					pp->name, state);
+				continue;
+			}
+		} else {
+			/* protocol in templates is not used for state/timeout */
+			pp = NULL;
+			if (state > 0) {
+				IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+					state);
+				state = 0;
+			}
+		}
+
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
+			cp = ip_vs_conn_in_get(AF_INET, s->protocol,
+					       (union nf_inet_addr *)&s->caddr,
+					       s->cport,
+					       (union nf_inet_addr *)&s->vaddr,
+					       s->vport);
+		else
+			cp = ip_vs_ct_in_get(AF_INET, s->protocol,
+					     (union nf_inet_addr *)&s->caddr,
+					     s->cport,
+					     (union nf_inet_addr *)&s->vaddr,
+					     s->vport);
+		if (!cp) {
+			/*
+			 * Find the appropriate destination for the connection.
+			 * If it is not found the connection will remain unbound
+			 * but still handled.
+			 */
+			dest = ip_vs_find_dest(AF_INET,
+					       (union nf_inet_addr *)&s->daddr,
+					       s->dport,
+					       (union nf_inet_addr *)&s->vaddr,
+					       s->vport,
+					       s->protocol);
+			/*  Set the approprite ativity flag */
+			if (s->protocol == IPPROTO_TCP) {
+				if (state != IP_VS_TCP_S_ESTABLISHED)
+					flags |= IP_VS_CONN_F_INACTIVE;
+				else
+					flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+			cp = ip_vs_conn_new(AF_INET, s->protocol,
+					    (union nf_inet_addr *)&s->caddr,
+					    s->cport,
+					    (union nf_inet_addr *)&s->vaddr,
+					    s->vport,
+					    (union nf_inet_addr *)&s->daddr,
+					    s->dport,
+					    flags, dest);
+			if (dest)
+				atomic_dec(&dest->refcnt);
+			if (!cp) {
+				IP_VS_ERR("ip_vs_conn_new failed\n");
+				return;
+			}
+		} else if (!cp->dest) {
+			dest = ip_vs_try_bind_dest(cp);
+			if (dest)
+				atomic_dec(&dest->refcnt);
+		} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+			   (cp->state != state)) {
+			/* update active/inactive flag for the connection */
+			dest = cp->dest;
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				(state != IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				(state == IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+
+		if (opt)
+			memcpy(&cp->in_seq, opt, sizeof(*opt));
+		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+		cp->state = state;
+		cp->old_state = cp->state;
+		/*
+		 * We can not recover the right timeout for templates
+		 * in all cases, we can not find the right fwmark
+		 * virtual service. If needed, we can do it for
+		 * non-fwmark persistent services.
+		 */
+		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+			cp->timeout = pp->timeout_table[state];
+		else
+			cp->timeout = (3*60*HZ);
+		ip_vs_conn_put(cp);
+	}
+}
+
+
+/*
+ *      Setup loopback of outgoing multicasts on a sending socket
+ */
+static void set_mcast_loop(struct sock *sk, u_char loop)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
+	lock_sock(sk);
+	inet->mc_loop = loop ? 1 : 0;
+	release_sock(sk);
+}
+
+/*
+ *      Specify TTL for outgoing multicasts on a sending socket
+ */
+static void set_mcast_ttl(struct sock *sk, u_char ttl)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
+	lock_sock(sk);
+	inet->mc_ttl = ttl;
+	release_sock(sk);
+}
+
+/*
+ *      Specifiy default interface for outgoing multicasts
+ */
+static int set_mcast_if(struct sock *sk, char *ifname)
+{
+	struct net_device *dev;
+	struct inet_sock *inet = inet_sk(sk);
+
+	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+		return -ENODEV;
+
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	lock_sock(sk);
+	inet->mc_index = dev->ifindex;
+	/*  inet->mc_addr  = 0; */
+	release_sock(sk);
+
+	return 0;
+}
+
+
+/*
+ *	Set the maximum length of sync message according to the
+ *	specified interface's MTU.
+ */
+static int set_sync_mesg_maxlen(int sync_state)
+{
+	struct net_device *dev;
+	int num;
+
+	if (sync_state == IP_VS_STATE_MASTER) {
+		if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+			return -ENODEV;
+
+		num = (dev->mtu - sizeof(struct iphdr) -
+		       sizeof(struct udphdr) -
+		       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
+		sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+			SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
+		IP_VS_DBG(7, "setting the maximum length of sync sending "
+			  "message %d.\n", sync_send_mesg_maxlen);
+	} else if (sync_state == IP_VS_STATE_BACKUP) {
+		if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+			return -ENODEV;
+
+		sync_recv_mesg_maxlen = dev->mtu -
+			sizeof(struct iphdr) - sizeof(struct udphdr);
+		IP_VS_DBG(7, "setting the maximum length of sync receiving "
+			  "message %d.\n", sync_recv_mesg_maxlen);
+	}
+
+	return 0;
+}
+
+
+/*
+ *      Join a multicast group.
+ *      the group is specified by a class D multicast address 224.0.0.0/8
+ *      in the in_addr structure passed in as a parameter.
+ */
+static int
+join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
+{
+	struct ip_mreqn mreq;
+	struct net_device *dev;
+	int ret;
+
+	memset(&mreq, 0, sizeof(mreq));
+	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
+
+	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+		return -ENODEV;
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	mreq.imr_ifindex = dev->ifindex;
+
+	lock_sock(sk);
+	ret = ip_mc_join_group(sk, &mreq);
+	release_sock(sk);
+
+	return ret;
+}
+
+
+static int bind_mcastif_addr(struct socket *sock, char *ifname)
+{
+	struct net_device *dev;
+	__be32 addr;
+	struct sockaddr_in sin;
+
+	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+		return -ENODEV;
+
+	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+	if (!addr)
+		IP_VS_ERR("You probably need to specify IP address on "
+			  "multicast interface.\n");
+
+	IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n",
+		  ifname, NIPQUAD(addr));
+
+	/* Now bind the socket with the address of multicast interface */
+	sin.sin_family	     = AF_INET;
+	sin.sin_addr.s_addr  = addr;
+	sin.sin_port         = 0;
+
+	return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
+}
+
+/*
+ *      Set up sending multicast socket over UDP
+ */
+static struct socket * make_send_sock(void)
+{
+	struct socket *sock;
+	int result;
+
+	/* First create a socket */
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	if (result < 0) {
+		IP_VS_ERR("Error during creation of socket; terminating\n");
+		return ERR_PTR(result);
+	}
+
+	result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+	if (result < 0) {
+		IP_VS_ERR("Error setting outbound mcast interface\n");
+		goto error;
+	}
+
+	set_mcast_loop(sock->sk, 0);
+	set_mcast_ttl(sock->sk, 1);
+
+	result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+	if (result < 0) {
+		IP_VS_ERR("Error binding address of the mcast interface\n");
+		goto error;
+	}
+
+	result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+			sizeof(struct sockaddr), 0);
+	if (result < 0) {
+		IP_VS_ERR("Error connecting to the multicast addr\n");
+		goto error;
+	}
+
+	return sock;
+
+  error:
+	sock_release(sock);
+	return ERR_PTR(result);
+}
+
+
+/*
+ *      Set up receiving multicast socket over UDP
+ */
+static struct socket * make_receive_sock(void)
+{
+	struct socket *sock;
+	int result;
+
+	/* First create a socket */
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	if (result < 0) {
+		IP_VS_ERR("Error during creation of socket; terminating\n");
+		return ERR_PTR(result);
+	}
+
+	/* it is equivalent to the REUSEADDR option in user-space */
+	sock->sk->sk_reuse = 1;
+
+	result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+			sizeof(struct sockaddr));
+	if (result < 0) {
+		IP_VS_ERR("Error binding to the multicast addr\n");
+		goto error;
+	}
+
+	/* join the multicast group */
+	result = join_mcast_group(sock->sk,
+			(struct in_addr *) &mcast_addr.sin_addr,
+			ip_vs_backup_mcast_ifn);
+	if (result < 0) {
+		IP_VS_ERR("Error joining to the multicast group\n");
+		goto error;
+	}
+
+	return sock;
+
+  error:
+	sock_release(sock);
+	return ERR_PTR(result);
+}
+
+
+static int
+ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
+{
+	struct msghdr	msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
+	struct kvec	iov;
+	int		len;
+
+	EnterFunction(7);
+	iov.iov_base     = (void *)buffer;
+	iov.iov_len      = length;
+
+	len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
+
+	LeaveFunction(7);
+	return len;
+}
+
+static void
+ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
+{
+	int msize;
+
+	msize = msg->size;
+
+	/* Put size in network byte order */
+	msg->size = htons(msg->size);
+
+	if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
+		IP_VS_ERR("ip_vs_send_async error\n");
+}
+
+static int
+ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
+{
+	struct msghdr		msg = {NULL,};
+	struct kvec		iov;
+	int			len;
+
+	EnterFunction(7);
+
+	/* Receive a packet */
+	iov.iov_base     = buffer;
+	iov.iov_len      = (size_t)buflen;
+
+	len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
+
+	if (len < 0)
+		return -1;
+
+	LeaveFunction(7);
+	return len;
+}
+
+
+static int sync_thread_master(void *data)
+{
+	struct ip_vs_sync_thread_data *tinfo = data;
+	struct ip_vs_sync_buff *sb;
+
+	IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
+		   "syncid = %d\n",
+		   ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+
+	while (!kthread_should_stop()) {
+		while ((sb = sb_dequeue())) {
+			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
+			ip_vs_sync_buff_release(sb);
+		}
+
+		/* check if entries stay in curr_sb for 2 seconds */
+		sb = get_curr_sync_buff(2 * HZ);
+		if (sb) {
+			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
+			ip_vs_sync_buff_release(sb);
+		}
+
+		schedule_timeout_interruptible(HZ);
+	}
+
+	/* clean up the sync_buff queue */
+	while ((sb=sb_dequeue())) {
+		ip_vs_sync_buff_release(sb);
+	}
+
+	/* clean up the current sync_buff */
+	if ((sb = get_curr_sync_buff(0))) {
+		ip_vs_sync_buff_release(sb);
+	}
+
+	/* release the sending multicast socket */
+	sock_release(tinfo->sock);
+	kfree(tinfo);
+
+	return 0;
+}
+
+
+static int sync_thread_backup(void *data)
+{
+	struct ip_vs_sync_thread_data *tinfo = data;
+	int len;
+
+	IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
+		   "syncid = %d\n",
+		   ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
+			 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
+			 || kthread_should_stop());
+
+		/* do we have data now? */
+		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
+			len = ip_vs_receive(tinfo->sock, tinfo->buf,
+					sync_recv_mesg_maxlen);
+			if (len <= 0) {
+				IP_VS_ERR("receiving message error\n");
+				break;
+			}
+
+			/* disable bottom half, because it accesses the data
+			   shared by softirq while getting/creating conns */
+			local_bh_disable();
+			ip_vs_process_message(tinfo->buf, len);
+			local_bh_enable();
+		}
+	}
+
+	/* release the sending multicast socket */
+	sock_release(tinfo->sock);
+	kfree(tinfo->buf);
+	kfree(tinfo);
+
+	return 0;
+}
+
+
+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+{
+	struct ip_vs_sync_thread_data *tinfo;
+	struct task_struct **realtask, *task;
+	struct socket *sock;
+	char *name, *buf = NULL;
+	int (*threadfn)(void *data);
+	int result = -ENOMEM;
+
+	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
+		  sizeof(struct ip_vs_sync_conn));
+
+	if (state == IP_VS_STATE_MASTER) {
+		if (sync_master_thread)
+			return -EEXIST;
+
+		strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
+			sizeof(ip_vs_master_mcast_ifn));
+		ip_vs_master_syncid = syncid;
+		realtask = &sync_master_thread;
+		name = "ipvs_syncmaster";
+		threadfn = sync_thread_master;
+		sock = make_send_sock();
+	} else if (state == IP_VS_STATE_BACKUP) {
+		if (sync_backup_thread)
+			return -EEXIST;
+
+		strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
+			sizeof(ip_vs_backup_mcast_ifn));
+		ip_vs_backup_syncid = syncid;
+		realtask = &sync_backup_thread;
+		name = "ipvs_syncbackup";
+		threadfn = sync_thread_backup;
+		sock = make_receive_sock();
+	} else {
+		return -EINVAL;
+	}
+
+	if (IS_ERR(sock)) {
+		result = PTR_ERR(sock);
+		goto out;
+	}
+
+	set_sync_mesg_maxlen(state);
+	if (state == IP_VS_STATE_BACKUP) {
+		buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+		if (!buf)
+			goto outsocket;
+	}
+
+	tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+	if (!tinfo)
+		goto outbuf;
+
+	tinfo->sock = sock;
+	tinfo->buf = buf;
+
+	task = kthread_run(threadfn, tinfo, name);
+	if (IS_ERR(task)) {
+		result = PTR_ERR(task);
+		goto outtinfo;
+	}
+
+	/* mark as active */
+	*realtask = task;
+	ip_vs_sync_state |= state;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	return 0;
+
+outtinfo:
+	kfree(tinfo);
+outbuf:
+	kfree(buf);
+outsocket:
+	sock_release(sock);
+out:
+	return result;
+}
+
+
+int stop_sync_thread(int state)
+{
+	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+
+	if (state == IP_VS_STATE_MASTER) {
+		if (!sync_master_thread)
+			return -ESRCH;
+
+		IP_VS_INFO("stopping master sync thread %d ...\n",
+			   task_pid_nr(sync_master_thread));
+
+		/*
+		 * The lock synchronizes with sb_queue_tail(), so that we don't
+		 * add sync buffers to the queue, when we are already in
+		 * progress of stopping the master sync daemon.
+		 */
+
+		spin_lock_bh(&ip_vs_sync_lock);
+		ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
+		spin_unlock_bh(&ip_vs_sync_lock);
+		kthread_stop(sync_master_thread);
+		sync_master_thread = NULL;
+	} else if (state == IP_VS_STATE_BACKUP) {
+		if (!sync_backup_thread)
+			return -ESRCH;
+
+		IP_VS_INFO("stopping backup sync thread %d ...\n",
+			   task_pid_nr(sync_backup_thread));
+
+		ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
+		kthread_stop(sync_backup_thread);
+		sync_backup_thread = NULL;
+	} else {
+		return -EINVAL;
+	}
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	return 0;
+}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
new file mode 100644
index 00000000000..8c596e71259
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -0,0 +1,128 @@
+/*
+ * IPVS:        Weighted Least-Connection Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wlc_schedule to return dest
+ *     Wensong Zhang            :     changed to use the inactconns in scheduling
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wlc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We think the overhead of processing active connections is 256
+	 * times higher than that of inactive connections in average. (This
+	 * 256 times might not be accurate, we will change it later) We
+	 * use the following formula to estimate the overhead now:
+	 *		  dest->activeconns*256 + dest->inactconns
+	 */
+	return (atomic_read(&dest->activeconns) << 8) +
+		atomic_read(&dest->inactconns);
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *		  (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_wlc_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		doh = ip_vs_wlc_dest_overhead(dest);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "WLC: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlc_scheduler =
+{
+	.name =			"wlc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.schedule =		ip_vs_wlc_schedule,
+};
+
+
+static int __init ip_vs_wlc_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+static void __exit ip_vs_wlc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+module_init(ip_vs_wlc_init);
+module_exit(ip_vs_wlc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
new file mode 100644
index 00000000000..7ea92fed50b
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -0,0 +1,237 @@
+/*
+ * IPVS:        Weighted Round-Robin Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wrr_schedule to return dest
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wrr_update_svc
+ *     Julian Anastasov         :     fixed the bug of returning destination
+ *                                    with weight 0 when all weights are zero
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+
+#include <net/ip_vs.h>
+
+/*
+ * current destination pointer for weighted round-robin scheduling
+ */
+struct ip_vs_wrr_mark {
+	struct list_head *cl;	/* current list head */
+	int cw;			/* current weight */
+	int mw;			/* maximum weight */
+	int di;			/* decreasing interval */
+};
+
+
+/*
+ *    Get the gcd of server weights
+ */
+static int gcd(int a, int b)
+{
+	int c;
+
+	while ((c = a % b)) {
+		a = b;
+		b = c;
+	}
+	return b;
+}
+
+static int ip_vs_wrr_gcd_weight(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+	int weight;
+	int g = 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		weight = atomic_read(&dest->weight);
+		if (weight > 0) {
+			if (g > 0)
+				g = gcd(weight, g);
+			else
+				g = weight;
+		}
+	}
+	return g ? g : 1;
+}
+
+
+/*
+ *    Get the maximum weight of the service destinations.
+ */
+static int ip_vs_wrr_max_weight(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+	int weight = 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (atomic_read(&dest->weight) > weight)
+			weight = atomic_read(&dest->weight);
+	}
+
+	return weight;
+}
+
+
+static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark;
+
+	/*
+	 *    Allocate the mark variable for WRR scheduling
+	 */
+	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
+	if (mark == NULL) {
+		IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	mark->cl = &svc->destinations;
+	mark->cw = 0;
+	mark->mw = ip_vs_wrr_max_weight(svc);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	svc->sched_data = mark;
+
+	return 0;
+}
+
+
+static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
+{
+	/*
+	 *    Release the mark variable
+	 */
+	kfree(svc->sched_data);
+
+	return 0;
+}
+
+
+static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+
+	mark->cl = &svc->destinations;
+	mark->mw = ip_vs_wrr_max_weight(svc);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	if (mark->cw > mark->mw)
+		mark->cw = 0;
+	return 0;
+}
+
+
+/*
+ *    Weighted Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+	struct list_head *p;
+
+	IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
+
+	/*
+	 * This loop will always terminate, because mark->cw in (0, max_weight]
+	 * and at least one server has its weight equal to max_weight.
+	 */
+	write_lock(&svc->sched_lock);
+	p = mark->cl;
+	while (1) {
+		if (mark->cl == &svc->destinations) {
+			/* it is at the head of the destination list */
+
+			if (mark->cl == mark->cl->next) {
+				/* no dest entry */
+				dest = NULL;
+				goto out;
+			}
+
+			mark->cl = svc->destinations.next;
+			mark->cw -= mark->di;
+			if (mark->cw <= 0) {
+				mark->cw = mark->mw;
+				/*
+				 * Still zero, which means no available servers.
+				 */
+				if (mark->cw == 0) {
+					mark->cl = &svc->destinations;
+					IP_VS_ERR_RL("ip_vs_wrr_schedule(): "
+						   "no available servers\n");
+					dest = NULL;
+					goto out;
+				}
+			}
+		} else
+			mark->cl = mark->cl->next;
+
+		if (mark->cl != &svc->destinations) {
+			/* not at the head of the list */
+			dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
+			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+			    atomic_read(&dest->weight) >= mark->cw) {
+				/* got it */
+				break;
+			}
+		}
+
+		if (mark->cl == p && mark->cw == mark->di) {
+			/* back to the start, and no dest is found.
+			   It is only possible when all dests are OVERLOADED */
+			dest = NULL;
+			goto out;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "WRR: server %s:%u "
+		      "activeconns %d refcnt %d weight %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+		      atomic_read(&dest->activeconns),
+		      atomic_read(&dest->refcnt),
+		      atomic_read(&dest->weight));
+
+  out:
+	write_unlock(&svc->sched_lock);
+	return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
+	.name =			"wrr",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.init_service =		ip_vs_wrr_init_svc,
+	.done_service =		ip_vs_wrr_done_svc,
+	.update_service =	ip_vs_wrr_update_svc,
+	.schedule =		ip_vs_wrr_schedule,
+};
+
+static int __init ip_vs_wrr_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
+}
+
+static void __exit ip_vs_wrr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
+}
+
+module_init(ip_vs_wrr_init);
+module_exit(ip_vs_wrr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
new file mode 100644
index 00000000000..02ddc2b3ce2
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -0,0 +1,1004 @@
+/*
+ * ip_vs_xmit.c: various packet transmitters for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/tcp.h>                  /* for tcphdr */
+#include <net/ip.h>
+#include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/route.h>                  /* for ip_route_output */
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/icmpv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      Destination cache to speed up outgoing route lookup
+ */
+static inline void
+__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
+{
+	struct dst_entry *old_dst;
+
+	old_dst = dest->dst_cache;
+	dest->dst_cache = dst;
+	dest->dst_rtos = rtos;
+	dst_release(old_dst);
+}
+
+static inline struct dst_entry *
+__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
+{
+	struct dst_entry *dst = dest->dst_cache;
+
+	if (!dst)
+		return NULL;
+	if ((dst->obsolete
+	     || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
+	    dst->ops->check(dst, cookie) == NULL) {
+		dest->dst_cache = NULL;
+		dst_release(dst);
+		return NULL;
+	}
+	dst_hold(dst);
+	return dst;
+}
+
+static struct rtable *
+__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (dest) {
+		spin_lock(&dest->dst_lock);
+		if (!(rt = (struct rtable *)
+		      __ip_vs_dst_check(dest, rtos, 0))) {
+			struct flowi fl = {
+				.oif = 0,
+				.nl_u = {
+					.ip4_u = {
+						.daddr = dest->addr.ip,
+						.saddr = 0,
+						.tos = rtos, } },
+			};
+
+			if (ip_route_output_key(&init_net, &rt, &fl)) {
+				spin_unlock(&dest->dst_lock);
+				IP_VS_DBG_RL("ip_route_output error, "
+					     "dest: %u.%u.%u.%u\n",
+					     NIPQUAD(dest->addr.ip));
+				return NULL;
+			}
+			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
+			IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
+				  NIPQUAD(dest->addr.ip),
+				  atomic_read(&rt->u.dst.__refcnt), rtos);
+		}
+		spin_unlock(&dest->dst_lock);
+	} else {
+		struct flowi fl = {
+			.oif = 0,
+			.nl_u = {
+				.ip4_u = {
+					.daddr = cp->daddr.ip,
+					.saddr = 0,
+					.tos = rtos, } },
+		};
+
+		if (ip_route_output_key(&init_net, &rt, &fl)) {
+			IP_VS_DBG_RL("ip_route_output error, dest: "
+				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
+			return NULL;
+		}
+	}
+
+	return rt;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static struct rt6_info *
+__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
+{
+	struct rt6_info *rt;			/* Route to the other host */
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (dest) {
+		spin_lock(&dest->dst_lock);
+		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
+		if (!rt) {
+			struct flowi fl = {
+				.oif = 0,
+				.nl_u = {
+					.ip6_u = {
+						.daddr = dest->addr.in6,
+						.saddr = {
+							.s6_addr32 =
+								{ 0, 0, 0, 0 },
+						},
+					},
+				},
+			};
+
+			rt = (struct rt6_info *)ip6_route_output(&init_net,
+								 NULL, &fl);
+			if (!rt) {
+				spin_unlock(&dest->dst_lock);
+				IP_VS_DBG_RL("ip6_route_output error, "
+					     "dest: " NIP6_FMT "\n",
+					     NIP6(dest->addr.in6));
+				return NULL;
+			}
+			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+			IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n",
+				  NIP6(dest->addr.in6),
+				  atomic_read(&rt->u.dst.__refcnt));
+		}
+		spin_unlock(&dest->dst_lock);
+	} else {
+		struct flowi fl = {
+			.oif = 0,
+			.nl_u = {
+				.ip6_u = {
+					.daddr = cp->daddr.in6,
+					.saddr = {
+						.s6_addr32 = { 0, 0, 0, 0 },
+					},
+				},
+			},
+		};
+
+		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+		if (!rt) {
+			IP_VS_DBG_RL("ip6_route_output error, dest: "
+				     NIP6_FMT "\n", NIP6(cp->daddr.in6));
+			return NULL;
+		}
+	}
+
+	return rt;
+}
+#endif
+
+
+/*
+ *	Release dest->dst_cache before a dest is removed
+ */
+void
+ip_vs_dst_reset(struct ip_vs_dest *dest)
+{
+	struct dst_entry *old_dst;
+
+	old_dst = dest->dst_cache;
+	dest->dst_cache = NULL;
+	dst_release(old_dst);
+}
+
+#define IP_VS_XMIT(pf, skb, rt)				\
+do {							\
+	(skb)->ipvs_property = 1;			\
+	skb_forward_csum(skb);				\
+	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
+		(rt)->u.dst.dev, dst_output);		\
+} while (0)
+
+
+/*
+ *      NULL transmitter (do nothing except return NF_ACCEPT)
+ */
+int
+ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp)
+{
+	/* we do not touch skb and do not need pskb ptr */
+	return NF_ACCEPT;
+}
+
+
+/*
+ *      Bypass transmitter
+ *      Let packets bypass the destination when the destination is not
+ *      available, it may be only used in transparent cache cluster.
+ */
+int
+ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct iphdr  *iph = ip_hdr(skb);
+	u8     tos = iph->tos;
+	int    mtu;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip4_u = {
+				.daddr = iph->daddr,
+				.saddr = 0,
+				.tos = RT_TOS(tos), } },
+	};
+
+	EnterFunction(10);
+
+	if (ip_route_output_key(&init_net, &rt, &fl)) {
+		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
+			     "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
+		goto tx_error_icmp;
+	}
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
+		ip_rt_put(rt);
+		return NF_STOLEN;
+	}
+	ip_send_check(ip_hdr(skb));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+ tx_error_icmp:
+	dst_link_failure(skb);
+ tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		     struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;			/* Route to the other host */
+	struct ipv6hdr  *iph = ipv6_hdr(skb);
+	int    mtu;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = iph->daddr,
+				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+	};
+
+	EnterFunction(10);
+
+	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+	if (!rt) {
+		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, "
+			     "dest: " NIP6_FMT "\n", NIP6(iph->daddr));
+		goto tx_error_icmp;
+	}
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		dst_release(&rt->u.dst);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(skb == NULL)) {
+		dst_release(&rt->u.dst);
+		return NF_STOLEN;
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+ tx_error_icmp:
+	dst_link_failure(skb);
+ tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
+/*
+ *      NAT transmitter (only for outside-to-inside nat forwarding)
+ *      Not used for related ICMP
+ */
+int
+ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+	       struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;		/* Route to the other host */
+	int mtu;
+	struct iphdr *iph = ip_hdr(skb);
+
+	EnterFunction(10);
+
+	/* check if it is a connection of no-client-port */
+	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+		__be16 _pt, *p;
+		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
+		if (p == NULL)
+			goto tx_error;
+		ip_vs_conn_fill_cport(cp, *p);
+		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+	}
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, sizeof(struct iphdr)))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* mangle the packet */
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+		goto tx_error;
+	ip_hdr(skb)->daddr = cp->daddr.ip;
+	ip_send_check(ip_hdr(skb));
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+	/* FIXME: when application helper enlarges the packet and the length
+	   is larger than the MTU of outgoing device, there will be still
+	   MTU problem. */
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	LeaveFunction(10);
+	kfree_skb(skb);
+	return NF_STOLEN;
+  tx_error_put:
+	ip_rt_put(rt);
+	goto tx_error;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;		/* Route to the other host */
+	int mtu;
+
+	EnterFunction(10);
+
+	/* check if it is a connection of no-client-port */
+	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+		__be16 _pt, *p;
+		p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
+				       sizeof(_pt), &_pt);
+		if (p == NULL)
+			goto tx_error;
+		ip_vs_conn_fill_cport(cp, *p);
+		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+	}
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		dst_release(&rt->u.dst);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+				 "ip_vs_nat_xmit_v6(): frag needed for");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* mangle the packet */
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+		goto tx_error;
+	ipv6_hdr(skb)->daddr = cp->daddr.in6;
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+	/* FIXME: when application helper enlarges the packet and the length
+	   is larger than the MTU of outgoing device, there will be still
+	   MTU problem. */
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	LeaveFunction(10);
+	kfree_skb(skb);
+	return NF_STOLEN;
+tx_error_put:
+	dst_release(&rt->u.dst);
+	goto tx_error;
+}
+#endif
+
+
+/*
+ *   IP Tunneling transmitter
+ *
+ *   This function encapsulates the packet in a new IP packet, its
+ *   destination will be set to cp->daddr. Most code of this function
+ *   is taken from ipip.c.
+ *
+ *   It is used in VS/TUN cluster. The load balancer selects a real
+ *   server from a cluster based on a scheduling algorithm,
+ *   encapsulates the request packet and forwards it to the selected
+ *   server. For example, all real servers are configured with
+ *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
+ *   the encapsulated packet, it will decapsulate the packet, processe
+ *   the request and return the response packets directly to the client
+ *   without passing the load balancer. This can greatly increase the
+ *   scalability of virtual server.
+ *
+ *   Used for ANY protocol
+ */
+int
+ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct net_device *tdev;		/* Device to other host */
+	struct iphdr  *old_iph = ip_hdr(skb);
+	u8     tos = old_iph->tos;
+	__be16 df = old_iph->frag_off;
+	sk_buff_data_t old_transport_header = skb->transport_header;
+	struct iphdr  *iph;			/* Our new IP header */
+	unsigned int max_headroom;		/* The extra header space needed */
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (skb->protocol != htons(ETH_P_IP)) {
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
+			     "ETH_P_IP: %d, skb protocol: %d\n",
+			     htons(ETH_P_IP), skb->protocol);
+		goto tx_error;
+	}
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
+		goto tx_error_icmp;
+
+	tdev = rt->u.dst.dev;
+
+	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+	if (mtu < 68) {
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
+		goto tx_error;
+	}
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	df |= (old_iph->frag_off & htons(IP_DF));
+
+	if ((old_iph->frag_off & htons(IP_DF))
+	    && mtu < ntohs(old_iph->tot_len)) {
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
+
+	if (skb_headroom(skb) < max_headroom
+	    || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb =
+			skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+			kfree_skb(skb);
+			IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
+			return NF_STOLEN;
+		}
+		kfree_skb(skb);
+		skb = new_skb;
+		old_iph = ip_hdr(skb);
+	}
+
+	skb->transport_header = old_transport_header;
+
+	/* fix old IP header checksum */
+	ip_send_check(old_iph);
+
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+	iph			=	ip_hdr(skb);
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr)>>2;
+	iph->frag_off		=	df;
+	iph->protocol		=	IPPROTO_IPIP;
+	iph->tos		=	tos;
+	iph->daddr		=	rt->rt_dst;
+	iph->saddr		=	rt->rt_src;
+	iph->ttl		=	old_iph->ttl;
+	ip_select_ident(iph, &rt->u.dst, NULL);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	ip_local_out(skb);
+
+	LeaveFunction(10);
+
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		     struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;		/* Route to the other host */
+	struct net_device *tdev;	/* Device to other host */
+	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
+	sk_buff_data_t old_transport_header = skb->transport_header;
+	struct ipv6hdr  *iph;		/* Our new IP header */
+	unsigned int max_headroom;	/* The extra header space needed */
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (skb->protocol != htons(ETH_P_IPV6)) {
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
+			     "ETH_P_IPV6: %d, skb protocol: %d\n",
+			     htons(ETH_P_IPV6), skb->protocol);
+		goto tx_error;
+	}
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	tdev = rt->u.dst.dev;
+
+	mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+	/* TODO IPv6: do we need this check in IPv6? */
+	if (mtu < 1280) {
+		dst_release(&rt->u.dst);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
+		goto tx_error;
+	}
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		dst_release(&rt->u.dst);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
+
+	if (skb_headroom(skb) < max_headroom
+	    || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb =
+			skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			dst_release(&rt->u.dst);
+			kfree_skb(skb);
+			IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
+			return NF_STOLEN;
+		}
+		kfree_skb(skb);
+		skb = new_skb;
+		old_iph = ipv6_hdr(skb);
+	}
+
+	skb->transport_header = old_transport_header;
+
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+	iph			=	ipv6_hdr(skb);
+	iph->version		=	6;
+	iph->nexthdr		=	IPPROTO_IPV6;
+	iph->payload_len	=	old_iph->payload_len + sizeof(old_iph);
+	iph->priority		=	old_iph->priority;
+	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+	iph->daddr		=	rt->rt6i_dst.addr;
+	iph->saddr		=	cp->vaddr.in6; /* rt->rt6i_src.addr; */
+	iph->hop_limit		=	old_iph->hop_limit;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	ip6_local_out(skb);
+
+	LeaveFunction(10);
+
+	return NF_STOLEN;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
+
+/*
+ *      Direct Routing transmitter
+ *      Used for ANY protocol
+ */
+int
+ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+	      struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct iphdr  *iph = ip_hdr(skb);
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
+		ip_rt_put(rt);
+		return NF_STOLEN;
+	}
+	ip_send_check(ip_hdr(skb));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		 struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;			/* Route to the other host */
+	int    mtu;
+
+	EnterFunction(10);
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		dst_release(&rt->u.dst);
+		IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(skb == NULL)) {
+		dst_release(&rt->u.dst);
+		return NF_STOLEN;
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
+
+/*
+ *	ICMP packet transmitter
+ *	called by the ip_vs_in_icmp
+ */
+int
+ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp, int offset)
+{
+	struct rtable	*rt;	/* Route to the other host */
+	int mtu;
+	int rc;
+
+	EnterFunction(10);
+
+	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+	   forwarded directly here, because there is no need to
+	   translate address/port back */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+		if (cp->packet_xmit)
+			rc = cp->packet_xmit(skb, cp, pp);
+		else
+			rc = NF_ACCEPT;
+		/* do not touch skb anymore */
+		atomic_inc(&cp->in_pkts);
+		goto out;
+	}
+
+	/*
+	 * mangle and send the packet here (only for VS/NAT)
+	 */
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, offset))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop the old route when skb is not shared */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	ip_vs_nat_icmp(skb, pp, cp, 0);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET, skb, rt);
+
+	rc = NF_STOLEN;
+	goto out;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	dev_kfree_skb(skb);
+	rc = NF_STOLEN;
+  out:
+	LeaveFunction(10);
+	return rc;
+  tx_error_put:
+	ip_rt_put(rt);
+	goto tx_error;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp, int offset)
+{
+	struct rt6_info	*rt;	/* Route to the other host */
+	int mtu;
+	int rc;
+
+	EnterFunction(10);
+
+	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+	   forwarded directly here, because there is no need to
+	   translate address/port back */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+		if (cp->packet_xmit)
+			rc = cp->packet_xmit(skb, cp, pp);
+		else
+			rc = NF_ACCEPT;
+		/* do not touch skb anymore */
+		atomic_inc(&cp->in_pkts);
+		goto out;
+	}
+
+	/*
+	 * mangle and send the packet here (only for VS/NAT)
+	 */
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		dst_release(&rt->u.dst);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, offset))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop the old route when skb is not shared */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	rc = NF_STOLEN;
+	goto out;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	dev_kfree_skb(skb);
+	rc = NF_STOLEN;
+out:
+	LeaveFunction(10);
+	return rc;
+tx_error_put:
+	dst_release(&rt->u.dst);
+	goto tx_error;
+}
+#endif
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 59bd8b903a1..03591d37b9c 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -22,19 +22,17 @@
 #define NF_CT_ACCT_DEFAULT 0
 #endif
 
-int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
-EXPORT_SYMBOL_GPL(nf_ct_acct);
+static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
 
 module_param_named(acct, nf_ct_acct, bool, 0644);
 MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
 
 #ifdef CONFIG_SYSCTL
-static struct ctl_table_header *acct_sysctl_header;
 static struct ctl_table acct_sysctl_table[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "nf_conntrack_acct",
-		.data		= &nf_ct_acct,
+		.data		= &init_net.ct.sysctl_acct,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -64,41 +62,87 @@ static struct nf_ct_ext_type acct_extend __read_mostly = {
 	.id	= NF_CT_EXT_ACCT,
 };
 
-int nf_conntrack_acct_init(void)
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_acct_init_sysctl(struct net *net)
 {
-	int ret;
+	struct ctl_table *table;
 
-#ifdef CONFIG_NF_CT_ACCT
-	printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
-	printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
-	printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
-#endif
+	table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		goto out;
+
+	table[0].data = &net->ct.sysctl_acct;
 
-	ret = nf_ct_extend_register(&acct_extend);
-	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
-		return ret;
+	net->ct.acct_sysctl_header = register_net_sysctl_table(net,
+			nf_net_netfilter_sysctl_path, table);
+	if (!net->ct.acct_sysctl_header) {
+		printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
+		goto out_register;
 	}
+	return 0;
 
-#ifdef CONFIG_SYSCTL
-	acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
-				acct_sysctl_table);
+out_register:
+	kfree(table);
+out:
+	return -ENOMEM;
+}
 
-	if (!acct_sysctl_header) {
-		nf_ct_extend_unregister(&acct_extend);
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+	struct ctl_table *table;
 
-		printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
-		return -ENOMEM;
-	}
+	table = net->ct.acct_sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.acct_sysctl_header);
+	kfree(table);
+}
+#else
+static int nf_conntrack_acct_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_acct_init(struct net *net)
+{
+	int ret;
+
+	net->ct.sysctl_acct = nf_ct_acct;
+
+	if (net_eq(net, &init_net)) {
+#ifdef CONFIG_NF_CT_ACCT
+		printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
+		printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
+		printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
 #endif
 
+		ret = nf_ct_extend_register(&acct_extend);
+		if (ret < 0) {
+			printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
+			goto out_extend_register;
+		}
+	}
+
+	ret = nf_conntrack_acct_init_sysctl(net);
+	if (ret < 0)
+		goto out_sysctl;
+
 	return 0;
+
+out_sysctl:
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&acct_extend);
+out_extend_register:
+	return ret;
 }
 
-void nf_conntrack_acct_fini(void)
+void nf_conntrack_acct_fini(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	unregister_sysctl_table(acct_sysctl_header);
-#endif
-	nf_ct_extend_unregister(&acct_extend);
+	nf_conntrack_acct_fini_sysctl(net);
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&acct_extend);
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9d1830da8e8..27de3c7b006 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -44,30 +44,17 @@
 DEFINE_SPINLOCK(nf_conntrack_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 
-/* nf_conntrack_standalone needs this */
-atomic_t nf_conntrack_count = ATOMIC_INIT(0);
-EXPORT_SYMBOL_GPL(nf_conntrack_count);
-
 unsigned int nf_conntrack_htable_size __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
 int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct hlist_head *nf_conntrack_hash __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_hash);
-
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
-unsigned int nf_ct_log_invalid __read_mostly;
-HLIST_HEAD(unconfirmed);
-static int nf_conntrack_vmalloc __read_mostly;
 static struct kmem_cache *nf_conntrack_cachep __read_mostly;
 
-DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
-
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -180,6 +167,7 @@ static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
@@ -212,7 +200,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 		hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
 	}
 
-	NF_CT_STAT_INC(delete);
+	NF_CT_STAT_INC(net, delete);
 	spin_unlock_bh(&nf_conntrack_lock);
 
 	if (ct->master)
@@ -225,6 +213,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 static void death_by_timeout(unsigned long ul_conntrack)
 {
 	struct nf_conn *ct = (void *)ul_conntrack;
+	struct net *net = nf_ct_net(ct);
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_helper *helper;
 
@@ -239,14 +228,14 @@ static void death_by_timeout(unsigned long ul_conntrack)
 	spin_lock_bh(&nf_conntrack_lock);
 	/* Inside lock so preempt is disabled on module removal path.
 	 * Otherwise we can get spurious warnings. */
-	NF_CT_STAT_INC(delete_list);
+	NF_CT_STAT_INC(net, delete_list);
 	clean_from_lists(ct);
 	spin_unlock_bh(&nf_conntrack_lock);
 	nf_ct_put(ct);
 }
 
 struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(const struct nf_conntrack_tuple *tuple)
+__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_node *n;
@@ -256,13 +245,13 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple)
 	 * at least once for the stats anyway.
 	 */
 	local_bh_disable();
-	hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
+	hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
 		if (nf_ct_tuple_equal(tuple, &h->tuple)) {
-			NF_CT_STAT_INC(found);
+			NF_CT_STAT_INC(net, found);
 			local_bh_enable();
 			return h;
 		}
-		NF_CT_STAT_INC(searched);
+		NF_CT_STAT_INC(net, searched);
 	}
 	local_bh_enable();
 
@@ -272,13 +261,13 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find);
 
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple)
+nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
 	rcu_read_lock();
-	h = __nf_conntrack_find(tuple);
+	h = __nf_conntrack_find(net, tuple);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
@@ -294,10 +283,12 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 				       unsigned int hash,
 				       unsigned int repl_hash)
 {
+	struct net *net = nf_ct_net(ct);
+
 	hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
-			   &nf_conntrack_hash[hash]);
+			   &net->ct.hash[hash]);
 	hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode,
-			   &nf_conntrack_hash[repl_hash]);
+			   &net->ct.hash[repl_hash]);
 }
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
@@ -323,8 +314,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	struct nf_conn_help *help;
 	struct hlist_node *n;
 	enum ip_conntrack_info ctinfo;
+	struct net *net;
 
 	ct = nf_ct_get(skb, &ctinfo);
+	net = nf_ct_net(ct);
 
 	/* ipt_REJECT uses nf_conntrack_attach to attach related
 	   ICMP/TCP RST packets in other direction.  Actual packet
@@ -351,11 +344,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	/* See if there's one in the list already, including reverse:
 	   NAT could have grabbed it without realizing, since we're
 	   not in the hash.  If there is, we lost race. */
-	hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode)
+	hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 				      &h->tuple))
 			goto out;
-	hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode)
+	hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
 				      &h->tuple))
 			goto out;
@@ -371,22 +364,22 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	add_timer(&ct->timeout);
 	atomic_inc(&ct->ct_general.use);
 	set_bit(IPS_CONFIRMED_BIT, &ct->status);
-	NF_CT_STAT_INC(insert);
+	NF_CT_STAT_INC(net, insert);
 	spin_unlock_bh(&nf_conntrack_lock);
 	help = nfct_help(ct);
 	if (help && help->helper)
-		nf_conntrack_event_cache(IPCT_HELPER, skb);
+		nf_conntrack_event_cache(IPCT_HELPER, ct);
 #ifdef CONFIG_NF_NAT_NEEDED
 	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
 	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_NATINFO, skb);
+		nf_conntrack_event_cache(IPCT_NATINFO, ct);
 #endif
 	nf_conntrack_event_cache(master_ct(ct) ?
-				 IPCT_RELATED : IPCT_NEW, skb);
+				 IPCT_RELATED : IPCT_NEW, ct);
 	return NF_ACCEPT;
 
 out:
-	NF_CT_STAT_INC(insert_failed);
+	NF_CT_STAT_INC(net, insert_failed);
 	spin_unlock_bh(&nf_conntrack_lock);
 	return NF_DROP;
 }
@@ -398,6 +391,7 @@ int
 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 			 const struct nf_conn *ignored_conntrack)
 {
+	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_node *n;
 	unsigned int hash = hash_conntrack(tuple);
@@ -406,14 +400,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	 * least once for the stats anyway.
 	 */
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
+	hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
 		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
 		    nf_ct_tuple_equal(tuple, &h->tuple)) {
-			NF_CT_STAT_INC(found);
+			NF_CT_STAT_INC(net, found);
 			rcu_read_unlock_bh();
 			return 1;
 		}
-		NF_CT_STAT_INC(searched);
+		NF_CT_STAT_INC(net, searched);
 	}
 	rcu_read_unlock_bh();
 
@@ -425,7 +419,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static noinline int early_drop(unsigned int hash)
+static noinline int early_drop(struct net *net, unsigned int hash)
 {
 	/* Use oldest entry, which is roughly LRU */
 	struct nf_conntrack_tuple_hash *h;
@@ -436,7 +430,7 @@ static noinline int early_drop(unsigned int hash)
 
 	rcu_read_lock();
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
+		hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 					 hnode) {
 			tmp = nf_ct_tuplehash_to_ctrack(h);
 			if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
@@ -458,13 +452,14 @@ static noinline int early_drop(unsigned int hash)
 	if (del_timer(&ct->timeout)) {
 		death_by_timeout((unsigned long)ct);
 		dropped = 1;
-		NF_CT_STAT_INC_ATOMIC(early_drop);
+		NF_CT_STAT_INC_ATOMIC(net, early_drop);
 	}
 	nf_ct_put(ct);
 	return dropped;
 }
 
-struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
+struct nf_conn *nf_conntrack_alloc(struct net *net,
+				   const struct nf_conntrack_tuple *orig,
 				   const struct nf_conntrack_tuple *repl,
 				   gfp_t gfp)
 {
@@ -476,13 +471,13 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	}
 
 	/* We don't want any race condition at early drop stage */
-	atomic_inc(&nf_conntrack_count);
+	atomic_inc(&net->ct.count);
 
 	if (nf_conntrack_max &&
-	    unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) {
+	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
 		unsigned int hash = hash_conntrack(orig);
-		if (!early_drop(hash)) {
-			atomic_dec(&nf_conntrack_count);
+		if (!early_drop(net, hash)) {
+			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "nf_conntrack: table full, dropping"
@@ -494,7 +489,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp);
 	if (ct == NULL) {
 		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
-		atomic_dec(&nf_conntrack_count);
+		atomic_dec(&net->ct.count);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -503,6 +498,9 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
 	/* Don't set timer yet: wait for confirmation */
 	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
+#ifdef CONFIG_NET_NS
+	ct->ct_net = net;
+#endif
 	INIT_RCU_HEAD(&ct->rcu);
 
 	return ct;
@@ -512,10 +510,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
 static void nf_conntrack_free_rcu(struct rcu_head *head)
 {
 	struct nf_conn *ct = container_of(head, struct nf_conn, rcu);
+	struct net *net = nf_ct_net(ct);
 
 	nf_ct_ext_free(ct);
 	kmem_cache_free(nf_conntrack_cachep, ct);
-	atomic_dec(&nf_conntrack_count);
+	atomic_dec(&net->ct.count);
 }
 
 void nf_conntrack_free(struct nf_conn *ct)
@@ -528,7 +527,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 /* Allocate a new conntrack: we return -ENOMEM if classification
    failed due to stress.  Otherwise it really is unclassifiable. */
 static struct nf_conntrack_tuple_hash *
-init_conntrack(const struct nf_conntrack_tuple *tuple,
+init_conntrack(struct net *net,
+	       const struct nf_conntrack_tuple *tuple,
 	       struct nf_conntrack_l3proto *l3proto,
 	       struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
@@ -544,7 +544,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		return NULL;
 	}
 
-	ct = nf_conntrack_alloc(tuple, &repl_tuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC);
 	if (ct == NULL || IS_ERR(ct)) {
 		pr_debug("Can't allocate conntrack.\n");
 		return (struct nf_conntrack_tuple_hash *)ct;
@@ -559,7 +559,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = nf_ct_find_expectation(tuple);
+	exp = nf_ct_find_expectation(net, tuple);
 	if (exp) {
 		pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
 			 ct, exp);
@@ -579,7 +579,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		ct->secmark = exp->master->secmark;
 #endif
 		nf_conntrack_get(&ct->master->ct_general);
-		NF_CT_STAT_INC(expect_new);
+		NF_CT_STAT_INC(net, expect_new);
 	} else {
 		struct nf_conntrack_helper *helper;
 
@@ -589,11 +589,12 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 			if (help)
 				rcu_assign_pointer(help->helper, helper);
 		}
-		NF_CT_STAT_INC(new);
+		NF_CT_STAT_INC(net, new);
 	}
 
 	/* Overload tuple linked list to put us in unconfirmed list. */
-	hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed);
+	hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
+		       &net->ct.unconfirmed);
 
 	spin_unlock_bh(&nf_conntrack_lock);
 
@@ -608,7 +609,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 
 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
 static inline struct nf_conn *
-resolve_normal_ct(struct sk_buff *skb,
+resolve_normal_ct(struct net *net,
+		  struct sk_buff *skb,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
 		  u_int8_t protonum,
@@ -629,9 +631,9 @@ resolve_normal_ct(struct sk_buff *skb,
 	}
 
 	/* look for tuple match */
-	h = nf_conntrack_find_get(&tuple);
+	h = nf_conntrack_find_get(net, &tuple);
 	if (!h) {
-		h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff);
+		h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff);
 		if (!h)
 			return NULL;
 		if (IS_ERR(h))
@@ -665,7 +667,8 @@ resolve_normal_ct(struct sk_buff *skb,
 }
 
 unsigned int
-nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
+nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
+		struct sk_buff *skb)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -678,44 +681,46 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
 
 	/* Previously seen (loopback or untracked)?  Ignore. */
 	if (skb->nfct) {
-		NF_CT_STAT_INC_ATOMIC(ignore);
+		NF_CT_STAT_INC_ATOMIC(net, ignore);
 		return NF_ACCEPT;
 	}
 
 	/* rcu_read_lock()ed by nf_hook_slow */
-	l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
+	l3proto = __nf_ct_l3proto_find(pf);
 	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
 				   &dataoff, &protonum);
 	if (ret <= 0) {
 		pr_debug("not prepared to track yet or error occured\n");
-		NF_CT_STAT_INC_ATOMIC(error);
-		NF_CT_STAT_INC_ATOMIC(invalid);
+		NF_CT_STAT_INC_ATOMIC(net, error);
+		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		return -ret;
 	}
 
-	l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum);
+	l4proto = __nf_ct_l4proto_find(pf, protonum);
 
 	/* It may be an special packet, error, unclean...
 	 * inverse of the return code tells to the netfilter
 	 * core what to do with the packet. */
-	if (l4proto->error != NULL &&
-	    (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
-		NF_CT_STAT_INC_ATOMIC(error);
-		NF_CT_STAT_INC_ATOMIC(invalid);
-		return -ret;
+	if (l4proto->error != NULL) {
+		ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
+		if (ret <= 0) {
+			NF_CT_STAT_INC_ATOMIC(net, error);
+			NF_CT_STAT_INC_ATOMIC(net, invalid);
+			return -ret;
+		}
 	}
 
-	ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto,
-			       &set_reply, &ctinfo);
+	ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
+			       l3proto, l4proto, &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
-		NF_CT_STAT_INC_ATOMIC(invalid);
+		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		return NF_ACCEPT;
 	}
 
 	if (IS_ERR(ct)) {
 		/* Too stressed to deal. */
-		NF_CT_STAT_INC_ATOMIC(drop);
+		NF_CT_STAT_INC_ATOMIC(net, drop);
 		return NF_DROP;
 	}
 
@@ -728,12 +733,12 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
 		pr_debug("nf_conntrack_in: Can't track with proto module\n");
 		nf_conntrack_put(skb->nfct);
 		skb->nfct = NULL;
-		NF_CT_STAT_INC_ATOMIC(invalid);
+		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		return -ret;
 	}
 
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_STATUS, skb);
+		nf_conntrack_event_cache(IPCT_STATUS, ct);
 
 	return ret;
 }
@@ -846,7 +851,7 @@ acct:
 
 	/* must be unlocked when calling event cache */
 	if (event)
-		nf_conntrack_event_cache(event, skb);
+		nf_conntrack_event_cache(event, ct);
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
@@ -938,7 +943,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 
 /* Bring out ya dead! */
 static struct nf_conn *
-get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
+get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 		void *data, unsigned int *bucket)
 {
 	struct nf_conntrack_tuple_hash *h;
@@ -947,13 +952,13 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
 
 	spin_lock_bh(&nf_conntrack_lock);
 	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
-		hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) {
+		hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) {
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			if (iter(ct, data))
 				goto found;
 		}
 	}
-	hlist_for_each_entry(h, n, &unconfirmed, hnode) {
+	hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		if (iter(ct, data))
 			set_bit(IPS_DYING_BIT, &ct->status);
@@ -966,13 +971,14 @@ found:
 	return ct;
 }
 
-void
-nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
+void nf_ct_iterate_cleanup(struct net *net,
+			   int (*iter)(struct nf_conn *i, void *data),
+			   void *data)
 {
 	struct nf_conn *ct;
 	unsigned int bucket = 0;
 
-	while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+	while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
 		if (del_timer(&ct->timeout))
 			death_by_timeout((unsigned long)ct);
@@ -998,27 +1004,26 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s
 }
 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
-void nf_conntrack_flush(void)
+void nf_conntrack_flush(struct net *net)
 {
-	nf_ct_iterate_cleanup(kill_all, NULL);
+	nf_ct_iterate_cleanup(net, kill_all, NULL);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_flush);
 
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void nf_conntrack_cleanup(void)
+static void nf_conntrack_cleanup_init_net(void)
 {
-	rcu_assign_pointer(ip_ct_attach, NULL);
-
-	/* This makes sure all current packets have passed through
-	   netfilter framework.  Roll on, two-stage module
-	   delete... */
-	synchronize_net();
+	nf_conntrack_helper_fini();
+	nf_conntrack_proto_fini();
+	kmem_cache_destroy(nf_conntrack_cachep);
+}
 
-	nf_ct_event_cache_flush();
+static void nf_conntrack_cleanup_net(struct net *net)
+{
+	nf_ct_event_cache_flush(net);
+	nf_conntrack_ecache_fini(net);
  i_see_dead_people:
-	nf_conntrack_flush();
-	if (atomic_read(&nf_conntrack_count) != 0) {
+	nf_conntrack_flush(net);
+	if (atomic_read(&net->ct.count) != 0) {
 		schedule();
 		goto i_see_dead_people;
 	}
@@ -1026,16 +1031,31 @@ void nf_conntrack_cleanup(void)
 	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
 		schedule();
 
-	rcu_assign_pointer(nf_ct_destroy, NULL);
-
-	kmem_cache_destroy(nf_conntrack_cachep);
-	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
+	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
+	nf_conntrack_acct_fini(net);
+	nf_conntrack_expect_fini(net);
+	free_percpu(net->ct.stat);
+}
 
-	nf_conntrack_acct_fini();
-	nf_conntrack_expect_fini();
-	nf_conntrack_helper_fini();
-	nf_conntrack_proto_fini();
+/* Mishearing the voices in his head, our hero wonders how he's
+   supposed to kill the mall. */
+void nf_conntrack_cleanup(struct net *net)
+{
+	if (net_eq(net, &init_net))
+		rcu_assign_pointer(ip_ct_attach, NULL);
+
+	/* This makes sure all current packets have passed through
+	   netfilter framework.  Roll on, two-stage module
+	   delete... */
+	synchronize_net();
+
+	nf_conntrack_cleanup_net(net);
+
+	if (net_eq(net, &init_net)) {
+		rcu_assign_pointer(nf_ct_destroy, NULL);
+		nf_conntrack_cleanup_init_net();
+	}
 }
 
 struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
@@ -1094,8 +1114,8 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	 */
 	spin_lock_bh(&nf_conntrack_lock);
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		while (!hlist_empty(&nf_conntrack_hash[i])) {
-			h = hlist_entry(nf_conntrack_hash[i].first,
+		while (!hlist_empty(&init_net.ct.hash[i])) {
+			h = hlist_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnode);
 			hlist_del_rcu(&h->hnode);
 			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
@@ -1103,12 +1123,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 		}
 	}
 	old_size = nf_conntrack_htable_size;
-	old_vmalloced = nf_conntrack_vmalloc;
-	old_hash = nf_conntrack_hash;
+	old_vmalloced = init_net.ct.hash_vmalloc;
+	old_hash = init_net.ct.hash;
 
 	nf_conntrack_htable_size = hashsize;
-	nf_conntrack_vmalloc = vmalloced;
-	nf_conntrack_hash = hash;
+	init_net.ct.hash_vmalloc = vmalloced;
+	init_net.ct.hash = hash;
 	nf_conntrack_hash_rnd = rnd;
 	spin_unlock_bh(&nf_conntrack_lock);
 
@@ -1120,7 +1140,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 		  &nf_conntrack_htable_size, 0600);
 
-int __init nf_conntrack_init(void)
+static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
 	int ret;
@@ -1142,13 +1162,6 @@ int __init nf_conntrack_init(void)
 		 * entries. */
 		max_factor = 4;
 	}
-	nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
-						  &nf_conntrack_vmalloc);
-	if (!nf_conntrack_hash) {
-		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
-		goto err_out;
-	}
-
 	nf_conntrack_max = max_factor * nf_conntrack_htable_size;
 
 	printk("nf_conntrack version %s (%u buckets, %d max)\n",
@@ -1160,48 +1173,103 @@ int __init nf_conntrack_init(void)
 						0, 0, NULL);
 	if (!nf_conntrack_cachep) {
 		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
-		goto err_free_hash;
+		ret = -ENOMEM;
+		goto err_cache;
 	}
 
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
-		goto err_free_conntrack_slab;
-
-	ret = nf_conntrack_expect_init();
-	if (ret < 0)
-		goto out_fini_proto;
+		goto err_proto;
 
 	ret = nf_conntrack_helper_init();
 	if (ret < 0)
-		goto out_fini_expect;
+		goto err_helper;
 
-	ret = nf_conntrack_acct_init();
-	if (ret < 0)
-		goto out_fini_helper;
+	return 0;
 
-	/* For use by REJECT target */
-	rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
-	rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
+err_helper:
+	nf_conntrack_proto_fini();
+err_proto:
+	kmem_cache_destroy(nf_conntrack_cachep);
+err_cache:
+	return ret;
+}
+
+static int nf_conntrack_init_net(struct net *net)
+{
+	int ret;
+
+	atomic_set(&net->ct.count, 0);
+	INIT_HLIST_HEAD(&net->ct.unconfirmed);
+	net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+	if (!net->ct.stat) {
+		ret = -ENOMEM;
+		goto err_stat;
+	}
+	ret = nf_conntrack_ecache_init(net);
+	if (ret < 0)
+		goto err_ecache;
+	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+						  &net->ct.hash_vmalloc);
+	if (!net->ct.hash) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
+		goto err_hash;
+	}
+	ret = nf_conntrack_expect_init(net);
+	if (ret < 0)
+		goto err_expect;
+	ret = nf_conntrack_acct_init(net);
+	if (ret < 0)
+		goto err_acct;
 
 	/* Set up fake conntrack:
 	    - to never be deleted, not in any hashes */
+#ifdef CONFIG_NET_NS
+	nf_conntrack_untracked.ct_net = &init_net;
+#endif
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
 	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
 
-	return ret;
+	return 0;
 
-out_fini_helper:
-	nf_conntrack_helper_fini();
-out_fini_expect:
-	nf_conntrack_expect_fini();
-out_fini_proto:
-	nf_conntrack_proto_fini();
-err_free_conntrack_slab:
-	kmem_cache_destroy(nf_conntrack_cachep);
-err_free_hash:
-	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
+err_acct:
+	nf_conntrack_expect_fini(net);
+err_expect:
+	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
-err_out:
-	return -ENOMEM;
+err_hash:
+	nf_conntrack_ecache_fini(net);
+err_ecache:
+	free_percpu(net->ct.stat);
+err_stat:
+	return ret;
+}
+
+int nf_conntrack_init(struct net *net)
+{
+	int ret;
+
+	if (net_eq(net, &init_net)) {
+		ret = nf_conntrack_init_init_net();
+		if (ret < 0)
+			goto out_init_net;
+	}
+	ret = nf_conntrack_init_net(net);
+	if (ret < 0)
+		goto out_net;
+
+	if (net_eq(net, &init_net)) {
+		/* For use by REJECT target */
+		rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
+		rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
+	}
+	return 0;
+
+out_net:
+	if (net_eq(net, &init_net))
+		nf_conntrack_cleanup_init_net();
+out_init_net:
+	return ret;
 }
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 83c41ac3505..a5f5e2e65d1 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -29,9 +29,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_chain);
 ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
 EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
 
-DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
-EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
-
 /* deliver cached events and clear cache entry - must be called with locally
  * disabled softirqs */
 static inline void
@@ -51,10 +48,11 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
  * by code prior to async packet handling for freeing the skb */
 void nf_ct_deliver_cached_events(const struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *ecache;
 
 	local_bh_disable();
-	ecache = &__get_cpu_var(nf_conntrack_ecache);
+	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
 	if (ecache->ct == ct)
 		__nf_ct_deliver_cached_events(ecache);
 	local_bh_enable();
@@ -64,10 +62,11 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 /* Deliver cached events for old pending events, if current conntrack != old */
 void __nf_ct_event_cache_init(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *ecache;
 
 	/* take care of delivering potentially old events */
-	ecache = &__get_cpu_var(nf_conntrack_ecache);
+	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
 	BUG_ON(ecache->ct == ct);
 	if (ecache->ct)
 		__nf_ct_deliver_cached_events(ecache);
@@ -79,18 +78,31 @@ EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
 
 /* flush the event cache - touches other CPU's data and must not be called
  * while packets are still passing through the code */
-void nf_ct_event_cache_flush(void)
+void nf_ct_event_cache_flush(struct net *net)
 {
 	struct nf_conntrack_ecache *ecache;
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		ecache = &per_cpu(nf_conntrack_ecache, cpu);
+		ecache = per_cpu_ptr(net->ct.ecache, cpu);
 		if (ecache->ct)
 			nf_ct_put(ecache->ct);
 	}
 }
 
+int nf_conntrack_ecache_init(struct net *net)
+{
+	net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
+	if (!net->ct.ecache)
+		return -ENOMEM;
+	return 0;
+}
+
+void nf_conntrack_ecache_fini(struct net *net)
+{
+	free_percpu(net->ct.ecache);
+}
+
 int nf_conntrack_register_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index e8f0dead267..37a703bc3b8 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -28,17 +28,12 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
-struct hlist_head *nf_ct_expect_hash __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
-
 unsigned int nf_ct_expect_hsize __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
 
 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
-static unsigned int nf_ct_expect_count;
 unsigned int nf_ct_expect_max __read_mostly;
 static int nf_ct_expect_hash_rnd_initted __read_mostly;
-static int nf_ct_expect_vmalloc;
 
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 
@@ -46,18 +41,19 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
+	struct net *net = nf_ct_exp_net(exp);
 
 	NF_CT_ASSERT(master_help);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
 
 	hlist_del_rcu(&exp->hnode);
-	nf_ct_expect_count--;
+	net->ct.expect_count--;
 
 	hlist_del(&exp->lnode);
 	master_help->expecting[exp->class]--;
 	nf_ct_expect_put(exp);
 
-	NF_CT_STAT_INC(expect_delete);
+	NF_CT_STAT_INC(net, expect_delete);
 }
 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
 
@@ -87,17 +83,17 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
 }
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 	struct hlist_node *n;
 	unsigned int h;
 
-	if (!nf_ct_expect_count)
+	if (!net->ct.expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(tuple);
-	hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) {
+	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
 		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
 			return i;
 	}
@@ -107,12 +103,12 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 
 /* Just find a expectation corresponding to a tuple. */
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 
 	rcu_read_lock();
-	i = __nf_ct_expect_find(tuple);
+	i = __nf_ct_expect_find(net, tuple);
 	if (i && !atomic_inc_not_zero(&i->use))
 		i = NULL;
 	rcu_read_unlock();
@@ -124,17 +120,17 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 struct nf_conntrack_expect *
-nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i, *exp = NULL;
 	struct hlist_node *n;
 	unsigned int h;
 
-	if (!nf_ct_expect_count)
+	if (!net->ct.expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(tuple);
-	hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
 		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
 			exp = i;
@@ -241,7 +237,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
 
 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
-		       int family,
+		       u_int8_t family,
 		       const union nf_inet_addr *saddr,
 		       const union nf_inet_addr *daddr,
 		       u_int8_t proto, const __be16 *src, const __be16 *dst)
@@ -311,6 +307,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
+	struct net *net = nf_ct_exp_net(exp);
 	const struct nf_conntrack_expect_policy *p;
 	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
@@ -319,8 +316,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	hlist_add_head(&exp->lnode, &master_help->expectations);
 	master_help->expecting[exp->class]++;
 
-	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
-	nf_ct_expect_count++;
+	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
+	net->ct.expect_count++;
 
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
@@ -329,7 +326,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	add_timer(&exp->timeout);
 
 	atomic_inc(&exp->use);
-	NF_CT_STAT_INC(expect_create);
+	NF_CT_STAT_INC(net, expect_create);
 }
 
 /* Race with expectations being used means we could have none to find; OK. */
@@ -371,6 +368,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 	struct nf_conntrack_expect *i;
 	struct nf_conn *master = expect->master;
 	struct nf_conn_help *master_help = nfct_help(master);
+	struct net *net = nf_ct_exp_net(expect);
 	struct hlist_node *n;
 	unsigned int h;
 	int ret;
@@ -383,7 +381,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 		goto out;
 	}
 	h = nf_ct_expect_dst_hash(&expect->tuple);
-	hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 		if (expect_matches(i, expect)) {
 			/* Refresh timer: if it's dying, ignore.. */
 			if (refresh_timer(i)) {
@@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 		}
 	}
 
-	if (nf_ct_expect_count >= nf_ct_expect_max) {
+	if (net->ct.expect_count >= nf_ct_expect_max) {
 		if (net_ratelimit())
 			printk(KERN_WARNING
 			       "nf_conntrack: expectation table full\n");
@@ -425,16 +423,18 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_related);
 
 #ifdef CONFIG_PROC_FS
 struct ct_expect_iter_state {
+	struct seq_net_private p;
 	unsigned int bucket;
 };
 
 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 	struct hlist_node *n;
 
 	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-		n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 		if (n)
 			return n;
 	}
@@ -444,13 +444,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 					     struct hlist_node *head)
 {
+	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 
 	head = rcu_dereference(head->next);
 	while (head == NULL) {
 		if (++st->bucket >= nf_ct_expect_hsize)
 			return NULL;
-		head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 	}
 	return head;
 }
@@ -524,7 +525,7 @@ static const struct seq_operations exp_seq_ops = {
 
 static int exp_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &exp_seq_ops,
+	return seq_open_net(inode, file, &exp_seq_ops,
 			sizeof(struct ct_expect_iter_state));
 }
 
@@ -533,72 +534,79 @@ static const struct file_operations exp_file_ops = {
 	.open    = exp_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 #endif /* CONFIG_PROC_FS */
 
-static int __init exp_proc_init(void)
+static int exp_proc_init(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *proc;
 
-	proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops);
+	proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
 	if (!proc)
 		return -ENOMEM;
 #endif /* CONFIG_PROC_FS */
 	return 0;
 }
 
-static void exp_proc_remove(void)
+static void exp_proc_remove(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_remove(&init_net, "nf_conntrack_expect");
+	proc_net_remove(net, "nf_conntrack_expect");
 #endif /* CONFIG_PROC_FS */
 }
 
 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
 
-int __init nf_conntrack_expect_init(void)
+int nf_conntrack_expect_init(struct net *net)
 {
 	int err = -ENOMEM;
 
-	if (!nf_ct_expect_hsize) {
-		nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
-		if (!nf_ct_expect_hsize)
-			nf_ct_expect_hsize = 1;
+	if (net_eq(net, &init_net)) {
+		if (!nf_ct_expect_hsize) {
+			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+			if (!nf_ct_expect_hsize)
+				nf_ct_expect_hsize = 1;
+		}
+		nf_ct_expect_max = nf_ct_expect_hsize * 4;
 	}
-	nf_ct_expect_max = nf_ct_expect_hsize * 4;
 
-	nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
-						  &nf_ct_expect_vmalloc);
-	if (nf_ct_expect_hash == NULL)
+	net->ct.expect_count = 0;
+	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
+						  &net->ct.expect_vmalloc);
+	if (net->ct.expect_hash == NULL)
 		goto err1;
 
-	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+	if (net_eq(net, &init_net)) {
+		nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
 					sizeof(struct nf_conntrack_expect),
 					0, 0, NULL);
-	if (!nf_ct_expect_cachep)
-		goto err2;
+		if (!nf_ct_expect_cachep)
+			goto err2;
+	}
 
-	err = exp_proc_init();
+	err = exp_proc_init(net);
 	if (err < 0)
 		goto err3;
 
 	return 0;
 
 err3:
-	kmem_cache_destroy(nf_ct_expect_cachep);
+	if (net_eq(net, &init_net))
+		kmem_cache_destroy(nf_ct_expect_cachep);
 err2:
-	nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 			     nf_ct_expect_hsize);
 err1:
 	return err;
 }
 
-void nf_conntrack_expect_fini(void)
+void nf_conntrack_expect_fini(struct net *net)
 {
-	exp_proc_remove();
-	kmem_cache_destroy(nf_ct_expect_cachep);
-	nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+	exp_proc_remove(net);
+	if (net_eq(net, &init_net))
+		kmem_cache_destroy(nf_ct_expect_cachep);
+	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 			     nf_ct_expect_hsize);
 }
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index bb20672fe03..4f7107107e9 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -318,7 +318,8 @@ static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir)
 }
 
 /* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir,
+static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
+			  struct nf_ct_ftp_master *info, int dir,
 			  struct sk_buff *skb)
 {
 	unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
@@ -336,11 +337,11 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir,
 
 	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
 		info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
-		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
 	} else if (oldest != NUM_SEQ_TO_REMEMBER &&
 		   after(nl_seq, info->seq_aft_nl[dir][oldest])) {
 		info->seq_aft_nl[dir][oldest] = nl_seq;
-		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
 	}
 }
 
@@ -509,7 +510,7 @@ out_update_nl:
 	/* Now if this ends in \n, update ftp info.  Seq may have been
 	 * adjusted by NAT code. */
 	if (ends_in_nl)
-		update_nl_seq(seq, ct_ftp_info, dir, skb);
+		update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
  out:
 	spin_unlock_bh(&nf_ftp_lock);
 	return ret;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 2f83c158934..c1504f71cdf 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -709,7 +709,8 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 /* If the calling party is on the same side of the forward-to party,
  * we don't need to track the second call */
 static int callforward_do_filter(const union nf_inet_addr *src,
-                                 const union nf_inet_addr *dst, int family)
+				 const union nf_inet_addr *dst,
+				 u_int8_t family)
 {
 	const struct nf_afinfo *afinfo;
 	struct flowi fl1, fl2;
@@ -1209,6 +1210,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 					       union nf_inet_addr *addr,
 					       __be16 port)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 
@@ -1218,7 +1220,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 	tuple.dst.u.tcp.port = port;
 	tuple.dst.protonum = IPPROTO_TCP;
 
-	exp = __nf_ct_expect_find(&tuple);
+	exp = __nf_ct_expect_find(net, &tuple);
 	if (exp && exp->master == ct)
 		return exp;
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8e0b4c8f62a..9c06b9f86ad 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -123,29 +123,18 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
 
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
+					     struct net *net)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
 	const struct hlist_node *n, *next;
 	unsigned int i;
 
-	mutex_lock(&nf_ct_helper_mutex);
-	hlist_del_rcu(&me->hnode);
-	nf_ct_helper_count--;
-	mutex_unlock(&nf_ct_helper_mutex);
-
-	/* Make sure every nothing is still using the helper unless its a
-	 * connection in the hash.
-	 */
-	synchronize_rcu();
-
-	spin_lock_bh(&nf_conntrack_lock);
-
 	/* Get rid of expectations */
 	for (i = 0; i < nf_ct_expect_hsize; i++) {
 		hlist_for_each_entry_safe(exp, n, next,
-					  &nf_ct_expect_hash[i], hnode) {
+					  &net->ct.expect_hash[i], hnode) {
 			struct nf_conn_help *help = nfct_help(exp->master);
 			if ((help->helper == me || exp->helper == me) &&
 			    del_timer(&exp->timeout)) {
@@ -156,12 +145,31 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	}
 
 	/* Get rid of expecteds, set helpers to NULL. */
-	hlist_for_each_entry(h, n, &unconfirmed, hnode)
+	hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode)
 		unhelp(h, me);
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode)
+		hlist_for_each_entry(h, n, &net->ct.hash[i], hnode)
 			unhelp(h, me);
 	}
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+	struct net *net;
+
+	mutex_lock(&nf_ct_helper_mutex);
+	hlist_del_rcu(&me->hnode);
+	nf_ct_helper_count--;
+	mutex_unlock(&nf_ct_helper_mutex);
+
+	/* Make sure every nothing is still using the helper unless its a
+	 * connection in the hash.
+	 */
+	synchronize_rcu();
+
+	spin_lock_bh(&nf_conntrack_lock);
+	for_each_net(net)
+		__nf_conntrack_helper_unregister(me, net);
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a8752031adc..cadfd15b44f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -549,7 +549,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	last = (struct nf_conn *)cb->args[1];
 	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
 restart:
-		hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[cb->args[0]],
+		hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
 					 hnode) {
 			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -794,14 +794,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
 	else {
 		/* Flush the whole table */
-		nf_conntrack_flush();
+		nf_conntrack_flush(&init_net);
 		return 0;
 	}
 
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&tuple);
+	h = nf_conntrack_find_get(&init_net, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -847,7 +847,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&tuple);
+	h = nf_conntrack_find_get(&init_net, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1125,7 +1125,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 	struct nf_conn_help *help;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(otuple, rtuple, GFP_KERNEL);
+	ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL);
 	if (ct == NULL || IS_ERR(ct))
 		return -ENOMEM;
 
@@ -1213,9 +1213,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(&otuple);
+		h = __nf_conntrack_find(&init_net, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(&rtuple);
+		h = __nf_conntrack_find(&init_net, &rtuple);
 
 	if (h == NULL) {
 		struct nf_conntrack_tuple master;
@@ -1230,7 +1230,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			if (err < 0)
 				goto out_unlock;
 
-			master_h = __nf_conntrack_find(&master);
+			master_h = __nf_conntrack_find(&init_net, &master);
 			if (master_h == NULL) {
 				err = -ENOENT;
 				goto out_unlock;
@@ -1458,6 +1458,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb)
 static int
 ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = &init_net;
 	struct nf_conntrack_expect *exp, *last;
 	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
 	struct hlist_node *n;
@@ -1467,7 +1468,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	last = (struct nf_conntrack_expect *)cb->args[1];
 	for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
 restart:
-		hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]],
+		hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]],
 				     hnode) {
 			if (l3proto && exp->tuple.src.l3num != l3proto)
 				continue;
@@ -1529,7 +1530,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(&tuple);
+	exp = nf_ct_expect_find_get(&init_net, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1583,7 +1584,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(&tuple);
+		exp = nf_ct_expect_find_get(&init_net, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1613,7 +1614,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		}
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
-						  &nf_ct_expect_hash[i],
+						  &init_net.ct.expect_hash[i],
 						  hnode) {
 				m_help = nfct_help(exp->master);
 				if (m_help->helper == h
@@ -1629,7 +1630,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		spin_lock_bh(&nf_conntrack_lock);
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
-						  &nf_ct_expect_hash[i],
+						  &init_net.ct.expect_hash[i],
 						  hnode) {
 				if (del_timer(&exp->timeout)) {
 					nf_ct_unlink_expect(exp);
@@ -1670,7 +1671,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3)
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(&master_tuple);
+	h = nf_conntrack_find_get(&init_net, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1724,7 +1725,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(&tuple);
+	exp = __nf_ct_expect_find(&init_net, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 97e54b0e43a..373e51e91ce 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -98,6 +98,7 @@ EXPORT_SYMBOL(pptp_msg_name);
 static void pptp_expectfn(struct nf_conn *ct,
 			 struct nf_conntrack_expect *exp)
 {
+	struct net *net = nf_ct_net(ct);
 	typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
 	pr_debug("increasing timeouts\n");
 
@@ -121,7 +122,7 @@ static void pptp_expectfn(struct nf_conn *ct,
 		pr_debug("trying to unexpect other dir: ");
 		nf_ct_dump_tuple(&inv_t);
 
-		exp_other = nf_ct_expect_find_get(&inv_t);
+		exp_other = nf_ct_expect_find_get(net, &inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
 			pr_debug("found\n");
@@ -134,7 +135,8 @@ static void pptp_expectfn(struct nf_conn *ct,
 	rcu_read_unlock();
 }
 
-static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
+static int destroy_sibling_or_exp(struct net *net,
+				  const struct nf_conntrack_tuple *t)
 {
 	const struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
@@ -143,7 +145,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 	pr_debug("trying to timeout ct or exp for tuple ");
 	nf_ct_dump_tuple(t);
 
-	h = nf_conntrack_find_get(t);
+	h = nf_conntrack_find_get(net, t);
 	if (h)  {
 		sibling = nf_ct_tuplehash_to_ctrack(h);
 		pr_debug("setting timeout of conntrack %p to 0\n", sibling);
@@ -154,7 +156,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 		nf_ct_put(sibling);
 		return 1;
 	} else {
-		exp = nf_ct_expect_find_get(t);
+		exp = nf_ct_expect_find_get(net, t);
 		if (exp) {
 			pr_debug("unexpect_related of expect %p\n", exp);
 			nf_ct_unexpect_related(exp);
@@ -168,6 +170,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 /* timeout GRE data connections */
 static void pptp_destroy_siblings(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	const struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_tuple t;
 
@@ -178,7 +181,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
-	if (!destroy_sibling_or_exp(&t))
+	if (!destroy_sibling_or_exp(net, &t))
 		pr_debug("failed to timeout original pns->pac ct/exp\n");
 
 	/* try reply (pac->pns) tuple */
@@ -186,7 +189,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
-	if (!destroy_sibling_or_exp(&t))
+	if (!destroy_sibling_or_exp(net, &t))
 		pr_debug("failed to timeout reply pac->pns ct/exp\n");
 }
 
@@ -594,15 +597,32 @@ static struct nf_conntrack_helper pptp __read_mostly = {
 	.expect_policy		= &pptp_exp_policy,
 };
 
+static void nf_conntrack_pptp_net_exit(struct net *net)
+{
+	nf_ct_gre_keymap_flush(net);
+}
+
+static struct pernet_operations nf_conntrack_pptp_net_ops = {
+	.exit = nf_conntrack_pptp_net_exit,
+};
+
 static int __init nf_conntrack_pptp_init(void)
 {
-	return nf_conntrack_helper_register(&pptp);
+	int rv;
+
+	rv = nf_conntrack_helper_register(&pptp);
+	if (rv < 0)
+		return rv;
+	rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops);
+	if (rv < 0)
+		nf_conntrack_helper_unregister(&pptp);
+	return rv;
 }
 
 static void __exit nf_conntrack_pptp_fini(void)
 {
 	nf_conntrack_helper_unregister(&pptp);
-	nf_ct_gre_keymap_flush();
+	unregister_pernet_subsys(&nf_conntrack_pptp_net_ops);
 }
 
 module_init(nf_conntrack_pptp_init);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index a49fc932629..a59a307e685 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -207,6 +207,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
 
 void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 {
+	struct net *net;
+
 	BUG_ON(proto->l3proto >= AF_MAX);
 
 	mutex_lock(&nf_ct_proto_mutex);
@@ -219,7 +221,8 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(kill_l3proto, proto);
+	for_each_net(net)
+		nf_ct_iterate_cleanup(net, kill_l3proto, proto);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
 
@@ -316,6 +319,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
 
 void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 {
+	struct net *net;
+
 	BUG_ON(l4proto->l3proto >= PF_MAX);
 
 	mutex_lock(&nf_ct_proto_mutex);
@@ -328,7 +333,8 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(kill_l4proto, l4proto);
+	for_each_net(net)
+		nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
 
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index e7866dd3cde..8fcf1762fab 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -418,6 +418,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		     unsigned int dataoff)
 {
+	struct net *net = nf_ct_net(ct);
 	struct dccp_hdr _dh, *dh;
 	const char *msg;
 	u_int8_t state;
@@ -445,7 +446,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 
 out_invalid:
-	if (LOG_INVALID(IPPROTO_DCCP))
+	if (LOG_INVALID(net, IPPROTO_DCCP))
 		nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg);
 	return false;
 }
@@ -461,8 +462,9 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
 
 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		       unsigned int dataoff, enum ip_conntrack_info ctinfo,
-		       int pf, unsigned int hooknum)
+		       u_int8_t pf, unsigned int hooknum)
 {
+	struct net *net = nf_ct_net(ct);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	struct dccp_hdr _dh, *dh;
 	u_int8_t type, old_state, new_state;
@@ -524,13 +526,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		ct->proto.dccp.last_pkt = type;
 
 		write_unlock_bh(&dccp_lock);
-		if (LOG_INVALID(IPPROTO_DCCP))
+		if (LOG_INVALID(net, IPPROTO_DCCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_dccp: invalid packet ignored ");
 		return NF_ACCEPT;
 	case CT_DCCP_INVALID:
 		write_unlock_bh(&dccp_lock);
-		if (LOG_INVALID(IPPROTO_DCCP))
+		if (LOG_INVALID(net, IPPROTO_DCCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_dccp: invalid state transition ");
 		return -NF_ACCEPT;
@@ -545,9 +547,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static int dccp_error(struct sk_buff *skb, unsigned int dataoff,
-		      enum ip_conntrack_info *ctinfo, int pf,
-		      unsigned int hooknum)
+static int dccp_error(struct net *net, struct sk_buff *skb,
+		      unsigned int dataoff, enum ip_conntrack_info *ctinfo,
+		      u_int8_t pf, unsigned int hooknum)
 {
 	struct dccp_hdr _dh, *dh;
 	unsigned int dccp_len = skb->len - dataoff;
@@ -575,7 +577,7 @@ static int dccp_error(struct sk_buff *skb, unsigned int dataoff,
 		}
 	}
 
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
 				pf)) {
 		msg = "nf_ct_dccp: bad checksum ";
@@ -590,7 +592,7 @@ static int dccp_error(struct sk_buff *skb, unsigned int dataoff,
 	return NF_ACCEPT;
 
 out_invalid:
-	if (LOG_INVALID(IPPROTO_DCCP))
+	if (LOG_INVALID(net, IPPROTO_DCCP))
 		nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg);
 	return -NF_ACCEPT;
 }
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index e31b0e7bd0b..dbe680af85d 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -45,7 +45,7 @@ static int packet(struct nf_conn *ct,
 		  const struct sk_buff *skb,
 		  unsigned int dataoff,
 		  enum ip_conntrack_info ctinfo,
-		  int pf,
+		  u_int8_t pf,
 		  unsigned int hooknum)
 {
 	nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9bd03967fea..a2cdbcbf64c 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -29,8 +29,11 @@
 #include <linux/list.h>
 #include <linux/seq_file.h>
 #include <linux/in.h>
+#include <linux/netdevice.h>
 #include <linux/skbuff.h>
-
+#include <net/dst.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
@@ -40,19 +43,23 @@
 #define GRE_TIMEOUT		(30 * HZ)
 #define GRE_STREAM_TIMEOUT	(180 * HZ)
 
-static DEFINE_RWLOCK(nf_ct_gre_lock);
-static LIST_HEAD(gre_keymap_list);
+static int proto_gre_net_id;
+struct netns_proto_gre {
+	rwlock_t		keymap_lock;
+	struct list_head	keymap_list;
+};
 
-void nf_ct_gre_keymap_flush(void)
+void nf_ct_gre_keymap_flush(struct net *net)
 {
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
 	struct nf_ct_gre_keymap *km, *tmp;
 
-	write_lock_bh(&nf_ct_gre_lock);
-	list_for_each_entry_safe(km, tmp, &gre_keymap_list, list) {
+	write_lock_bh(&net_gre->keymap_lock);
+	list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
 		list_del(&km->list);
 		kfree(km);
 	}
-	write_unlock_bh(&nf_ct_gre_lock);
+	write_unlock_bh(&net_gre->keymap_lock);
 }
 EXPORT_SYMBOL(nf_ct_gre_keymap_flush);
 
@@ -67,19 +74,20 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
 }
 
 /* look up the source key for a given tuple */
-static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t)
+static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
 {
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
 	struct nf_ct_gre_keymap *km;
 	__be16 key = 0;
 
-	read_lock_bh(&nf_ct_gre_lock);
-	list_for_each_entry(km, &gre_keymap_list, list) {
+	read_lock_bh(&net_gre->keymap_lock);
+	list_for_each_entry(km, &net_gre->keymap_list, list) {
 		if (gre_key_cmpfn(km, t)) {
 			key = km->tuple.src.u.gre.key;
 			break;
 		}
 	}
-	read_unlock_bh(&nf_ct_gre_lock);
+	read_unlock_bh(&net_gre->keymap_lock);
 
 	pr_debug("lookup src key 0x%x for ", key);
 	nf_ct_dump_tuple(t);
@@ -91,20 +99,22 @@ static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t)
 int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 			 struct nf_conntrack_tuple *t)
 {
+	struct net *net = nf_ct_net(ct);
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_ct_gre_keymap **kmp, *km;
 
 	kmp = &help->help.ct_pptp_info.keymap[dir];
 	if (*kmp) {
 		/* check whether it's a retransmission */
-		read_lock_bh(&nf_ct_gre_lock);
-		list_for_each_entry(km, &gre_keymap_list, list) {
+		read_lock_bh(&net_gre->keymap_lock);
+		list_for_each_entry(km, &net_gre->keymap_list, list) {
 			if (gre_key_cmpfn(km, t) && km == *kmp) {
-				read_unlock_bh(&nf_ct_gre_lock);
+				read_unlock_bh(&net_gre->keymap_lock);
 				return 0;
 			}
 		}
-		read_unlock_bh(&nf_ct_gre_lock);
+		read_unlock_bh(&net_gre->keymap_lock);
 		pr_debug("trying to override keymap_%s for ct %p\n",
 			 dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
 		return -EEXIST;
@@ -119,9 +129,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 	pr_debug("adding new entry %p: ", km);
 	nf_ct_dump_tuple(&km->tuple);
 
-	write_lock_bh(&nf_ct_gre_lock);
-	list_add_tail(&km->list, &gre_keymap_list);
-	write_unlock_bh(&nf_ct_gre_lock);
+	write_lock_bh(&net_gre->keymap_lock);
+	list_add_tail(&km->list, &net_gre->keymap_list);
+	write_unlock_bh(&net_gre->keymap_lock);
 
 	return 0;
 }
@@ -130,12 +140,14 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add);
 /* destroy the keymap entries associated with specified master ct */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
 	struct nf_conn_help *help = nfct_help(ct);
 	enum ip_conntrack_dir dir;
 
 	pr_debug("entering for ct %p\n", ct);
 
-	write_lock_bh(&nf_ct_gre_lock);
+	write_lock_bh(&net_gre->keymap_lock);
 	for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
 		if (help->help.ct_pptp_info.keymap[dir]) {
 			pr_debug("removing %p from list\n",
@@ -145,7 +157,7 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
 			help->help.ct_pptp_info.keymap[dir] = NULL;
 		}
 	}
-	write_unlock_bh(&nf_ct_gre_lock);
+	write_unlock_bh(&net_gre->keymap_lock);
 }
 EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
 
@@ -164,6 +176,7 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 			     struct nf_conntrack_tuple *tuple)
 {
+	struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev);
 	const struct gre_hdr_pptp *pgrehdr;
 	struct gre_hdr_pptp _pgrehdr;
 	__be16 srckey;
@@ -190,7 +203,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	}
 
 	tuple->dst.u.gre.key = pgrehdr->call_id;
-	srckey = gre_keymap_lookup(tuple);
+	srckey = gre_keymap_lookup(net, tuple);
 	tuple->src.u.gre.key = srckey;
 
 	return true;
@@ -219,7 +232,7 @@ static int gre_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      int pf,
+		      u_int8_t pf,
 		      unsigned int hooknum)
 {
 	/* If we've seen traffic both ways, this is a GRE connection.
@@ -229,7 +242,7 @@ static int gre_packet(struct nf_conn *ct,
 				   ct->proto.gre.stream_timeout);
 		/* Also, more likely to be important, and not a probe. */
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, skb);
+		nf_conntrack_event_cache(IPCT_STATUS, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb,
 				   ct->proto.gre.timeout);
@@ -285,15 +298,53 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 #endif
 };
 
+static int proto_gre_net_init(struct net *net)
+{
+	struct netns_proto_gre *net_gre;
+	int rv;
+
+	net_gre = kmalloc(sizeof(struct netns_proto_gre), GFP_KERNEL);
+	if (!net_gre)
+		return -ENOMEM;
+	rwlock_init(&net_gre->keymap_lock);
+	INIT_LIST_HEAD(&net_gre->keymap_list);
+
+	rv = net_assign_generic(net, proto_gre_net_id, net_gre);
+	if (rv < 0)
+		kfree(net_gre);
+	return rv;
+}
+
+static void proto_gre_net_exit(struct net *net)
+{
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
+
+	nf_ct_gre_keymap_flush(net);
+	kfree(net_gre);
+}
+
+static struct pernet_operations proto_gre_net_ops = {
+	.init = proto_gre_net_init,
+	.exit = proto_gre_net_exit,
+};
+
 static int __init nf_ct_proto_gre_init(void)
 {
-	return nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
+	int rv;
+
+	rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
+	if (rv < 0)
+		return rv;
+	rv = register_pernet_gen_device(&proto_gre_net_id, &proto_gre_net_ops);
+	if (rv < 0)
+		nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+	return rv;
 }
 
 static void nf_ct_proto_gre_fini(void)
 {
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
-	nf_ct_gre_keymap_flush();
+	unregister_pernet_gen_device(proto_gre_net_id, &proto_gre_net_ops);
 }
 
 module_init(nf_ct_proto_gre_init);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 30aa5b94a77..ae8c2609e23 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -287,7 +287,7 @@ static int sctp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
-		       int pf,
+		       u_int8_t pf,
 		       unsigned int hooknum)
 {
 	enum sctp_conntrack new_state, old_state;
@@ -369,7 +369,7 @@ static int sctp_packet(struct nf_conn *ct,
 
 		ct->proto.sctp.state = new_state;
 		if (old_state != new_state)
-			nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+			nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 	}
 	write_unlock_bh(&sctp_lock);
 
@@ -380,7 +380,7 @@ static int sctp_packet(struct nf_conn *ct,
 	    new_state == SCTP_CONNTRACK_ESTABLISHED) {
 		pr_debug("Setting assured bit\n");
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, skb);
+		nf_conntrack_event_cache(IPCT_STATUS, ct);
 	}
 
 	return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 6f61261888e..f947ec41e39 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -486,8 +486,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			  const struct sk_buff *skb,
 			  unsigned int dataoff,
 			  const struct tcphdr *tcph,
-			  int pf)
+			  u_int8_t pf)
 {
+	struct net *net = nf_ct_net(ct);
 	struct ip_ct_tcp_state *sender = &state->seen[dir];
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
@@ -668,7 +669,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
 		    nf_ct_tcp_be_liberal)
 			res = true;
-		if (!res && LOG_INVALID(IPPROTO_TCP))
+		if (!res && LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 			"nf_ct_tcp: %s ",
 			before(seq, sender->td_maxend + 1) ?
@@ -746,10 +747,11 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
-static int tcp_error(struct sk_buff *skb,
+static int tcp_error(struct net *net,
+		     struct sk_buff *skb,
 		     unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
-		     int pf,
+		     u_int8_t pf,
 		     unsigned int hooknum)
 {
 	const struct tcphdr *th;
@@ -760,7 +762,7 @@ static int tcp_error(struct sk_buff *skb,
 	/* Smaller that minimal TCP header? */
 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_tcp: short packet ");
 		return -NF_ACCEPT;
@@ -768,7 +770,7 @@ static int tcp_error(struct sk_buff *skb,
 
 	/* Not whole TCP header or malformed packet */
 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_tcp: truncated/malformed packet ");
 		return -NF_ACCEPT;
@@ -779,9 +781,9 @@ static int tcp_error(struct sk_buff *skb,
 	 * because the checksum is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: bad TCP checksum ");
 		return -NF_ACCEPT;
@@ -790,7 +792,7 @@ static int tcp_error(struct sk_buff *skb,
 	/* Check TCP flags. */
 	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
 	if (!tcp_valid_flags[tcpflags]) {
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid TCP flag combination ");
 		return -NF_ACCEPT;
@@ -804,9 +806,10 @@ static int tcp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      int pf,
+		      u_int8_t pf,
 		      unsigned int hooknum)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_tuple *tuple;
 	enum tcp_conntrack new_state, old_state;
 	enum ip_conntrack_dir dir;
@@ -885,7 +888,7 @@ static int tcp_packet(struct nf_conn *ct,
 			 * thus initiate a clean new session.
 			 */
 			write_unlock_bh(&tcp_lock);
-			if (LOG_INVALID(IPPROTO_TCP))
+			if (LOG_INVALID(net, IPPROTO_TCP))
 				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 					  "nf_ct_tcp: killing out of sync session ");
 			nf_ct_kill(ct);
@@ -898,7 +901,7 @@ static int tcp_packet(struct nf_conn *ct,
 		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 
 		write_unlock_bh(&tcp_lock);
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid packet ignored ");
 		return NF_ACCEPT;
@@ -907,7 +910,7 @@ static int tcp_packet(struct nf_conn *ct,
 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 			 dir, get_conntrack_index(th), old_state);
 		write_unlock_bh(&tcp_lock);
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid state ");
 		return -NF_ACCEPT;
@@ -968,9 +971,9 @@ static int tcp_packet(struct nf_conn *ct,
 		timeout = tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
-	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
 	if (new_state != old_state)
-		nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 
 	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
 		/* If only reply is a RST, we can consider ourselves not to
@@ -989,7 +992,7 @@ static int tcp_packet(struct nf_conn *ct,
 		   after SYN_RECV or a valid answer for a picked up
 		   connection. */
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, skb);
+		nf_conntrack_event_cache(IPCT_STATUS, ct);
 	}
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
 
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8b21762e65d..7c2ca48698b 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -66,7 +66,7 @@ static int udp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      int pf,
+		      u_int8_t pf,
 		      unsigned int hooknum)
 {
 	/* If we've seen traffic both ways, this is some kind of UDP
@@ -75,7 +75,7 @@ static int udp_packet(struct nf_conn *ct,
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_STATUS, skb);
+			nf_conntrack_event_cache(IPCT_STATUS, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout);
 
@@ -89,9 +89,9 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 }
 
-static int udp_error(struct sk_buff *skb, unsigned int dataoff,
+static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
-		     int pf,
+		     u_int8_t pf,
 		     unsigned int hooknum)
 {
 	unsigned int udplen = skb->len - dataoff;
@@ -101,7 +101,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
 	/* Header is too small? */
 	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
 	if (hdr == NULL) {
-		if (LOG_INVALID(IPPROTO_UDP))
+		if (LOG_INVALID(net, IPPROTO_UDP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udp: short packet ");
 		return -NF_ACCEPT;
@@ -109,7 +109,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
 
 	/* Truncated/malformed packets */
 	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
-		if (LOG_INVALID(IPPROTO_UDP))
+		if (LOG_INVALID(net, IPPROTO_UDP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_udp: truncated/malformed packet ");
 		return -NF_ACCEPT;
@@ -123,9 +123,9 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
 	 * We skip checking packets on the outgoing path
 	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
-		if (LOG_INVALID(IPPROTO_UDP))
+		if (LOG_INVALID(net, IPPROTO_UDP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_udp: bad UDP checksum ");
 		return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 1fa62f3c24f..d22d839e4f9 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -65,7 +65,7 @@ static int udplite_packet(struct nf_conn *ct,
 			  const struct sk_buff *skb,
 			  unsigned int dataoff,
 			  enum ip_conntrack_info ctinfo,
-			  int pf,
+			  u_int8_t pf,
 			  unsigned int hooknum)
 {
 	/* If we've seen traffic both ways, this is some kind of UDP
@@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct,
 				   nf_ct_udplite_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_STATUS, skb);
+			nf_conntrack_event_cache(IPCT_STATUS, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout);
 
@@ -89,9 +89,11 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 }
 
-static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
+static int udplite_error(struct net *net,
+			 struct sk_buff *skb,
+			 unsigned int dataoff,
 			 enum ip_conntrack_info *ctinfo,
-			 int pf,
+			 u_int8_t pf,
 			 unsigned int hooknum)
 {
 	unsigned int udplen = skb->len - dataoff;
@@ -102,7 +104,7 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
 	/* Header is too small? */
 	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
 	if (hdr == NULL) {
-		if (LOG_INVALID(IPPROTO_UDPLITE))
+		if (LOG_INVALID(net, IPPROTO_UDPLITE))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udplite: short packet ");
 		return -NF_ACCEPT;
@@ -112,7 +114,7 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
 	if (cscov == 0)
 		cscov = udplen;
 	else if (cscov < sizeof(*hdr) || cscov > udplen) {
-		if (LOG_INVALID(IPPROTO_UDPLITE))
+		if (LOG_INVALID(net, IPPROTO_UDPLITE))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_udplite: invalid checksum coverage ");
 		return -NF_ACCEPT;
@@ -120,17 +122,17 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
 
 	/* UDPLITE mandates checksums */
 	if (!hdr->check) {
-		if (LOG_INVALID(IPPROTO_UDPLITE))
+		if (LOG_INVALID(net, IPPROTO_UDPLITE))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udplite: checksum missing ");
 		return -NF_ACCEPT;
 	}
 
 	/* Checksum invalid? Ignore. */
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
 	    			pf)) {
-		if (LOG_INVALID(IPPROTO_UDPLITE))
+		if (LOG_INVALID(net, IPPROTO_UDPLITE))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udplite: bad UDPLite checksum ");
 		return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1fa306be60f..6813f1c8863 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -736,6 +736,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 	struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct net *net = nf_ct_net(ct);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	union nf_inet_addr *saddr;
 	struct nf_conntrack_tuple tuple;
@@ -775,7 +776,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 
 	rcu_read_lock();
 	do {
-		exp = __nf_ct_expect_find(&tuple);
+		exp = __nf_ct_expect_find(net, &tuple);
 
 		if (!exp || exp->master == ct ||
 		    nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 8509db14670..98106d4e89f 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -40,18 +40,20 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
 EXPORT_SYMBOL_GPL(print_tuple);
 
 struct ct_iter_state {
+	struct seq_net_private p;
 	unsigned int bucket;
 };
 
 static struct hlist_node *ct_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 	struct hlist_node *n;
 
 	for (st->bucket = 0;
 	     st->bucket < nf_conntrack_htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(nf_conntrack_hash[st->bucket].first);
+		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (n)
 			return n;
 	}
@@ -61,13 +63,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
 static struct hlist_node *ct_get_next(struct seq_file *seq,
 				      struct hlist_node *head)
 {
+	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 
 	head = rcu_dereference(head->next);
 	while (head == NULL) {
 		if (++st->bucket >= nf_conntrack_htable_size)
 			return NULL;
-		head = rcu_dereference(nf_conntrack_hash[st->bucket].first);
+		head = rcu_dereference(net->ct.hash[st->bucket].first);
 	}
 	return head;
 }
@@ -177,7 +180,7 @@ static const struct seq_operations ct_seq_ops = {
 
 static int ct_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &ct_seq_ops,
+	return seq_open_net(inode, file, &ct_seq_ops,
 			sizeof(struct ct_iter_state));
 }
 
@@ -186,11 +189,12 @@ static const struct file_operations ct_file_ops = {
 	.open    = ct_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct net *net = seq_file_net(seq);
 	int cpu;
 
 	if (*pos == 0)
@@ -200,7 +204,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu + 1;
-		return &per_cpu(nf_conntrack_stat, cpu);
+		return per_cpu_ptr(net->ct.stat, cpu);
 	}
 
 	return NULL;
@@ -208,13 +212,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct net *net = seq_file_net(seq);
 	int cpu;
 
 	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu + 1;
-		return &per_cpu(nf_conntrack_stat, cpu);
+		return per_cpu_ptr(net->ct.stat, cpu);
 	}
 
 	return NULL;
@@ -226,7 +231,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
 
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-	unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
+	struct net *net = seq_file_net(seq);
+	unsigned int nr_conntracks = atomic_read(&net->ct.count);
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -266,7 +272,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
 
 static int ct_cpu_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ct_cpu_seq_ops);
+	return seq_open_net(inode, file, &ct_cpu_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations ct_cpu_seq_fops = {
@@ -274,56 +281,52 @@ static const struct file_operations ct_cpu_seq_fops = {
 	.open	 = ct_cpu_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_net,
 };
 
-static int nf_conntrack_standalone_init_proc(void)
+static int nf_conntrack_standalone_init_proc(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
-	pde = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops);
+	pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops);
 	if (!pde)
 		goto out_nf_conntrack;
 
-	pde = proc_create("nf_conntrack", S_IRUGO, init_net.proc_net_stat,
+	pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
 			  &ct_cpu_seq_fops);
 	if (!pde)
 		goto out_stat_nf_conntrack;
 	return 0;
 
 out_stat_nf_conntrack:
-	proc_net_remove(&init_net, "nf_conntrack");
+	proc_net_remove(net, "nf_conntrack");
 out_nf_conntrack:
 	return -ENOMEM;
 }
 
-static void nf_conntrack_standalone_fini_proc(void)
+static void nf_conntrack_standalone_fini_proc(struct net *net)
 {
-	remove_proc_entry("nf_conntrack", init_net.proc_net_stat);
-	proc_net_remove(&init_net, "nf_conntrack");
+	remove_proc_entry("nf_conntrack", net->proc_net_stat);
+	proc_net_remove(net, "nf_conntrack");
 }
 #else
-static int nf_conntrack_standalone_init_proc(void)
+static int nf_conntrack_standalone_init_proc(struct net *net)
 {
 	return 0;
 }
 
-static void nf_conntrack_standalone_fini_proc(void)
+static void nf_conntrack_standalone_fini_proc(struct net *net)
 {
 }
 #endif /* CONFIG_PROC_FS */
 
 /* Sysctl support */
 
-int nf_conntrack_checksum __read_mostly = 1;
-EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
-
 #ifdef CONFIG_SYSCTL
 /* Log invalid packets of a given protocol */
 static int log_invalid_proto_min = 0;
 static int log_invalid_proto_max = 255;
 
-static struct ctl_table_header *nf_ct_sysctl_header;
 static struct ctl_table_header *nf_ct_netfilter_header;
 
 static ctl_table nf_ct_sysctl_table[] = {
@@ -338,7 +341,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_COUNT,
 		.procname	= "nf_conntrack_count",
-		.data		= &nf_conntrack_count,
+		.data		= &init_net.ct.count,
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
 		.proc_handler	= &proc_dointvec,
@@ -354,7 +357,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_CHECKSUM,
 		.procname	= "nf_conntrack_checksum",
-		.data		= &nf_conntrack_checksum,
+		.data		= &init_net.ct.sysctl_checksum,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -362,7 +365,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_LOG_INVALID,
 		.procname	= "nf_conntrack_log_invalid",
-		.data		= &nf_ct_log_invalid,
+		.data		= &init_net.ct.sysctl_log_invalid,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
@@ -400,74 +403,109 @@ static struct ctl_path nf_ct_path[] = {
 	{ }
 };
 
-EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
-
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
-	nf_ct_netfilter_header =
-		register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
-	if (!nf_ct_netfilter_header)
-		goto out;
-
-	nf_ct_sysctl_header =
-		 register_sysctl_paths(nf_net_netfilter_sysctl_path,
-					nf_ct_sysctl_table);
-	if (!nf_ct_sysctl_header)
+	struct ctl_table *table;
+
+	if (net_eq(net, &init_net)) {
+		nf_ct_netfilter_header =
+		       register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
+		if (!nf_ct_netfilter_header)
+			goto out;
+	}
+
+	table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		goto out_kmemdup;
+
+	table[1].data = &net->ct.count;
+	table[3].data = &net->ct.sysctl_checksum;
+	table[4].data = &net->ct.sysctl_log_invalid;
+
+	net->ct.sysctl_header = register_net_sysctl_table(net,
+					nf_net_netfilter_sysctl_path, table);
+	if (!net->ct.sysctl_header)
 		goto out_unregister_netfilter;
 
 	return 0;
 
 out_unregister_netfilter:
-	unregister_sysctl_table(nf_ct_netfilter_header);
+	kfree(table);
+out_kmemdup:
+	if (net_eq(net, &init_net))
+		unregister_sysctl_table(nf_ct_netfilter_header);
 out:
 	printk("nf_conntrack: can't register to sysctl.\n");
 	return -ENOMEM;
 }
 
-static void nf_conntrack_standalone_fini_sysctl(void)
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 {
-	unregister_sysctl_table(nf_ct_netfilter_header);
-	unregister_sysctl_table(nf_ct_sysctl_header);
+	struct ctl_table *table;
+
+	if (net_eq(net, &init_net))
+		unregister_sysctl_table(nf_ct_netfilter_header);
+	table = net->ct.sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.sysctl_header);
+	kfree(table);
 }
 #else
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
 	return 0;
 }
 
-static void nf_conntrack_standalone_fini_sysctl(void)
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 {
 }
 #endif /* CONFIG_SYSCTL */
 
-static int __init nf_conntrack_standalone_init(void)
+static int nf_conntrack_net_init(struct net *net)
 {
 	int ret;
 
-	ret = nf_conntrack_init();
+	ret = nf_conntrack_init(net);
 	if (ret < 0)
-		goto out;
-	ret = nf_conntrack_standalone_init_proc();
+		goto out_init;
+	ret = nf_conntrack_standalone_init_proc(net);
 	if (ret < 0)
 		goto out_proc;
-	ret = nf_conntrack_standalone_init_sysctl();
+	net->ct.sysctl_checksum = 1;
+	net->ct.sysctl_log_invalid = 0;
+	ret = nf_conntrack_standalone_init_sysctl(net);
 	if (ret < 0)
 		goto out_sysctl;
 	return 0;
 
 out_sysctl:
-	nf_conntrack_standalone_fini_proc();
+	nf_conntrack_standalone_fini_proc(net);
 out_proc:
-	nf_conntrack_cleanup();
-out:
+	nf_conntrack_cleanup(net);
+out_init:
 	return ret;
 }
 
+static void nf_conntrack_net_exit(struct net *net)
+{
+	nf_conntrack_standalone_fini_sysctl(net);
+	nf_conntrack_standalone_fini_proc(net);
+	nf_conntrack_cleanup(net);
+}
+
+static struct pernet_operations nf_conntrack_net_ops = {
+	.init = nf_conntrack_net_init,
+	.exit = nf_conntrack_net_exit,
+};
+
+static int __init nf_conntrack_standalone_init(void)
+{
+	return register_pernet_subsys(&nf_conntrack_net_ops);
+}
+
 static void __exit nf_conntrack_standalone_fini(void)
 {
-	nf_conntrack_standalone_fini_sysctl();
-	nf_conntrack_standalone_fini_proc();
-	nf_conntrack_cleanup();
+	unregister_pernet_subsys(&nf_conntrack_net_ops);
 }
 
 module_init(nf_conntrack_standalone_init);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 196269c1e58..bf6609978af 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -15,7 +15,7 @@
 /* core.c */
 extern unsigned int nf_iterate(struct list_head *head,
 				struct sk_buff *skb,
-				int hook,
+				unsigned int hook,
 				const struct net_device *indev,
 				const struct net_device *outdev,
 				struct list_head **i,
@@ -25,7 +25,7 @@ extern unsigned int nf_iterate(struct list_head *head,
 /* nf_queue.c */
 extern int nf_queue(struct sk_buff *skb,
 		    struct list_head *elem,
-		    int pf, unsigned int hook,
+		    u_int8_t pf, unsigned int hook,
 		    struct net_device *indev,
 		    struct net_device *outdev,
 		    int (*okfn)(struct sk_buff *),
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 9fda6ee95a3..fa8ae5d2659 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -15,16 +15,16 @@
 
 #define NF_LOG_PREFIXLEN		128
 
-static const struct nf_logger *nf_loggers[NPROTO] __read_mostly;
+static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
 static DEFINE_MUTEX(nf_log_mutex);
 
 /* return EBUSY if somebody else is registered, EEXIST if the same logger
  * is registred, 0 on success. */
-int nf_log_register(int pf, const struct nf_logger *logger)
+int nf_log_register(u_int8_t pf, const struct nf_logger *logger)
 {
 	int ret;
 
-	if (pf >= NPROTO)
+	if (pf >= ARRAY_SIZE(nf_loggers))
 		return -EINVAL;
 
 	/* Any setup of logging members must be done before
@@ -45,9 +45,9 @@ int nf_log_register(int pf, const struct nf_logger *logger)
 }
 EXPORT_SYMBOL(nf_log_register);
 
-void nf_log_unregister_pf(int pf)
+void nf_log_unregister_pf(u_int8_t pf)
 {
-	if (pf >= NPROTO)
+	if (pf >= ARRAY_SIZE(nf_loggers))
 		return;
 	mutex_lock(&nf_log_mutex);
 	rcu_assign_pointer(nf_loggers[pf], NULL);
@@ -63,7 +63,7 @@ void nf_log_unregister(const struct nf_logger *logger)
 	int i;
 
 	mutex_lock(&nf_log_mutex);
-	for (i = 0; i < NPROTO; i++) {
+	for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
 		if (nf_loggers[i] == logger)
 			rcu_assign_pointer(nf_loggers[i], NULL);
 	}
@@ -73,7 +73,7 @@ void nf_log_unregister(const struct nf_logger *logger)
 }
 EXPORT_SYMBOL(nf_log_unregister);
 
-void nf_log_packet(int pf,
+void nf_log_packet(u_int8_t pf,
 		   unsigned int hooknum,
 		   const struct sk_buff *skb,
 		   const struct net_device *in,
@@ -103,7 +103,7 @@ static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
 	rcu_read_lock();
 
-	if (*pos >= NPROTO)
+	if (*pos >= ARRAY_SIZE(nf_loggers))
 		return NULL;
 
 	return pos;
@@ -113,7 +113,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	(*pos)++;
 
-	if (*pos >= NPROTO)
+	if (*pos >= ARRAY_SIZE(nf_loggers))
 		return NULL;
 
 	return pos;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 582ec3efc8a..4f2310c93e0 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -16,17 +16,17 @@
  * long term mutex.  The handler must provide an an outfn() to accept packets
  * for queueing and must reinject all packets it receives, no matter what.
  */
-static const struct nf_queue_handler *queue_handler[NPROTO];
+static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
 
 static DEFINE_MUTEX(queue_handler_mutex);
 
 /* return EBUSY when somebody else is registered, return EEXIST if the
  * same handler is registered, return 0 in case of success. */
-int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
+int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
 	int ret;
 
-	if (pf >= NPROTO)
+	if (pf >= ARRAY_SIZE(queue_handler))
 		return -EINVAL;
 
 	mutex_lock(&queue_handler_mutex);
@@ -45,9 +45,9 @@ int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
 EXPORT_SYMBOL(nf_register_queue_handler);
 
 /* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh)
+int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
-	if (pf >= NPROTO)
+	if (pf >= ARRAY_SIZE(queue_handler))
 		return -EINVAL;
 
 	mutex_lock(&queue_handler_mutex);
@@ -67,10 +67,10 @@ EXPORT_SYMBOL(nf_unregister_queue_handler);
 
 void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 {
-	int pf;
+	u_int8_t pf;
 
 	mutex_lock(&queue_handler_mutex);
-	for (pf = 0; pf < NPROTO; pf++)  {
+	for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
 		if (queue_handler[pf] == qh)
 			rcu_assign_pointer(queue_handler[pf], NULL);
 	}
@@ -107,7 +107,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
  */
 static int __nf_queue(struct sk_buff *skb,
 		      struct list_head *elem,
-		      int pf, unsigned int hook,
+		      u_int8_t pf, unsigned int hook,
 		      struct net_device *indev,
 		      struct net_device *outdev,
 		      int (*okfn)(struct sk_buff *),
@@ -191,7 +191,7 @@ err:
 
 int nf_queue(struct sk_buff *skb,
 	     struct list_head *elem,
-	     int pf, unsigned int hook,
+	     u_int8_t pf, unsigned int hook,
 	     struct net_device *indev,
 	     struct net_device *outdev,
 	     int (*okfn)(struct sk_buff *),
@@ -285,7 +285,7 @@ EXPORT_SYMBOL(nf_reinject);
 #ifdef CONFIG_PROC_FS
 static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
-	if (*pos >= NPROTO)
+	if (*pos >= ARRAY_SIZE(queue_handler))
 		return NULL;
 
 	return pos;
@@ -295,7 +295,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	(*pos)++;
 
-	if (*pos >= NPROTO)
+	if (*pos >= ARRAY_SIZE(queue_handler))
 		return NULL;
 
 	return pos;
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 01489681fa9..8ab829f8657 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -60,14 +60,11 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
 }
 EXPORT_SYMBOL(nf_unregister_sockopt);
 
-static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf,
+static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
 		int val, int get)
 {
 	struct nf_sockopt_ops *ops;
 
-	if (!net_eq(sock_net(sk), &init_net))
-		return ERR_PTR(-ENOPROTOOPT);
-
 	if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
 		return ERR_PTR(-EINTR);
 
@@ -96,7 +93,7 @@ out:
 }
 
 /* Call get/setsockopt() */
-static int nf_sockopt(struct sock *sk, int pf, int val,
+static int nf_sockopt(struct sock *sk, u_int8_t pf, int val,
 		      char __user *opt, int *len, int get)
 {
 	struct nf_sockopt_ops *ops;
@@ -115,21 +112,22 @@ static int nf_sockopt(struct sock *sk, int pf, int val,
 	return ret;
 }
 
-int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
+int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
 		  int len)
 {
 	return nf_sockopt(sk, pf, val, opt, &len, 0);
 }
 EXPORT_SYMBOL(nf_setsockopt);
 
-int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
+int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
+		  int *len)
 {
 	return nf_sockopt(sk, pf, val, opt, len, 1);
 }
 EXPORT_SYMBOL(nf_getsockopt);
 
 #ifdef CONFIG_COMPAT
-static int compat_nf_sockopt(struct sock *sk, int pf, int val,
+static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val,
 			     char __user *opt, int *len, int get)
 {
 	struct nf_sockopt_ops *ops;
@@ -155,14 +153,14 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val,
 	return ret;
 }
 
-int compat_nf_setsockopt(struct sock *sk, int pf,
+int compat_nf_setsockopt(struct sock *sk, u_int8_t pf,
 		int val, char __user *opt, int len)
 {
 	return compat_nf_sockopt(sk, pf, val, opt, &len, 0);
 }
 EXPORT_SYMBOL(compat_nf_setsockopt);
 
-int compat_nf_getsockopt(struct sock *sk, int pf,
+int compat_nf_getsockopt(struct sock *sk, u_int8_t pf,
 		int val, char __user *opt, int *len)
 {
 	return compat_nf_sockopt(sk, pf, val, opt, len, 1);
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
new file mode 100644
index 00000000000..fe34f4bf74c
--- /dev/null
+++ b/net/netfilter/nf_tproxy_core.c
@@ -0,0 +1,96 @@
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (c) 2006-2007 BalaBit IT Ltd.
+ * Author: Balazs Scheidler, Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/net.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <net/udp.h>
+#include <net/netfilter/nf_tproxy_core.h>
+
+struct sock *
+nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+		      const __be32 saddr, const __be32 daddr,
+		      const __be16 sport, const __be16 dport,
+		      const struct net_device *in, bool listening_only)
+{
+	struct sock *sk;
+
+	/* look up socket */
+	switch (protocol) {
+	case IPPROTO_TCP:
+		if (listening_only)
+			sk = __inet_lookup_listener(net, &tcp_hashinfo,
+						    daddr, ntohs(dport),
+						    in->ifindex);
+		else
+			sk = __inet_lookup(net, &tcp_hashinfo,
+					   saddr, sport, daddr, dport,
+					   in->ifindex);
+		break;
+	case IPPROTO_UDP:
+		sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+				     in->ifindex);
+		break;
+	default:
+		WARN_ON(1);
+		sk = NULL;
+	}
+
+	pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n",
+		 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
+
+static void
+nf_tproxy_destructor(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	skb->sk = NULL;
+	skb->destructor = NULL;
+
+	if (sk)
+		nf_tproxy_put_sock(sk);
+}
+
+/* consumes sk */
+int
+nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+	if (inet_sk(sk)->transparent) {
+		skb->sk = sk;
+		skb->destructor = nf_tproxy_destructor;
+		return 1;
+	} else
+		nf_tproxy_put_sock(sk);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
+
+static int __init nf_tproxy_init(void)
+{
+	pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
+	pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
+	return 0;
+}
+
+module_init(nf_tproxy_init);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs");
+MODULE_DESCRIPTION("Transparent proxy support core routines");
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 9a35b57ab76..41e0105d382 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -359,7 +359,7 @@ static inline int
 __build_packet_message(struct nfulnl_instance *inst,
 			const struct sk_buff *skb,
 			unsigned int data_len,
-			unsigned int pf,
+			u_int8_t pf,
 			unsigned int hooknum,
 			const struct net_device *indev,
 			const struct net_device *outdev,
@@ -534,7 +534,7 @@ static struct nf_loginfo default_loginfo = {
 
 /* log handler for internal netfilter logging api */
 static void
-nfulnl_log_packet(unsigned int pf,
+nfulnl_log_packet(u_int8_t pf,
 		  unsigned int hooknum,
 		  const struct sk_buff *skb,
 		  const struct net_device *in,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 5d75cd86ebb..89837a4eef7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -30,7 +30,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("[ip,ip6,arp]_tables backend module");
+MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 
 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
@@ -58,17 +58,20 @@ static struct xt_af *xt;
 #define duprintf(format, args...)
 #endif
 
-static const char *const xt_prefix[NPROTO] = {
-	[AF_INET]	= "ip",
-	[AF_INET6]	= "ip6",
-	[NF_ARP]	= "arp",
+static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
+	[NFPROTO_UNSPEC] = "x",
+	[NFPROTO_IPV4]   = "ip",
+	[NFPROTO_ARP]    = "arp",
+	[NFPROTO_BRIDGE] = "eb",
+	[NFPROTO_IPV6]   = "ip6",
 };
 
 /* Registration hooks for targets. */
 int
 xt_register_target(struct xt_target *target)
 {
-	int ret, af = target->family;
+	u_int8_t af = target->family;
+	int ret;
 
 	ret = mutex_lock_interruptible(&xt[af].mutex);
 	if (ret != 0)
@@ -82,7 +85,7 @@ EXPORT_SYMBOL(xt_register_target);
 void
 xt_unregister_target(struct xt_target *target)
 {
-	int af = target->family;
+	u_int8_t af = target->family;
 
 	mutex_lock(&xt[af].mutex);
 	list_del(&target->list);
@@ -123,7 +126,8 @@ EXPORT_SYMBOL(xt_unregister_targets);
 int
 xt_register_match(struct xt_match *match)
 {
-	int ret, af = match->family;
+	u_int8_t af = match->family;
+	int ret;
 
 	ret = mutex_lock_interruptible(&xt[af].mutex);
 	if (ret != 0)
@@ -139,7 +143,7 @@ EXPORT_SYMBOL(xt_register_match);
 void
 xt_unregister_match(struct xt_match *match)
 {
-	int af =  match->family;
+	u_int8_t af = match->family;
 
 	mutex_lock(&xt[af].mutex);
 	list_del(&match->list);
@@ -185,7 +189,7 @@ EXPORT_SYMBOL(xt_unregister_matches);
  */
 
 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
-struct xt_match *xt_find_match(int af, const char *name, u8 revision)
+struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
 {
 	struct xt_match *m;
 	int err = 0;
@@ -205,12 +209,17 @@ struct xt_match *xt_find_match(int af, const char *name, u8 revision)
 		}
 	}
 	mutex_unlock(&xt[af].mutex);
+
+	if (af != NFPROTO_UNSPEC)
+		/* Try searching again in the family-independent list */
+		return xt_find_match(NFPROTO_UNSPEC, name, revision);
+
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(xt_find_match);
 
 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
-struct xt_target *xt_find_target(int af, const char *name, u8 revision)
+struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
 {
 	struct xt_target *t;
 	int err = 0;
@@ -230,11 +239,16 @@ struct xt_target *xt_find_target(int af, const char *name, u8 revision)
 		}
 	}
 	mutex_unlock(&xt[af].mutex);
+
+	if (af != NFPROTO_UNSPEC)
+		/* Try searching again in the family-independent list */
+		return xt_find_target(NFPROTO_UNSPEC, name, revision);
+
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(xt_find_target);
 
-struct xt_target *xt_request_find_target(int af, const char *name, u8 revision)
+struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
 {
 	struct xt_target *target;
 
@@ -246,7 +260,7 @@ struct xt_target *xt_request_find_target(int af, const char *name, u8 revision)
 }
 EXPORT_SYMBOL_GPL(xt_request_find_target);
 
-static int match_revfn(int af, const char *name, u8 revision, int *bestp)
+static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
 {
 	const struct xt_match *m;
 	int have_rev = 0;
@@ -262,7 +276,7 @@ static int match_revfn(int af, const char *name, u8 revision, int *bestp)
 	return have_rev;
 }
 
-static int target_revfn(int af, const char *name, u8 revision, int *bestp)
+static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
 {
 	const struct xt_target *t;
 	int have_rev = 0;
@@ -279,7 +293,7 @@ static int target_revfn(int af, const char *name, u8 revision, int *bestp)
 }
 
 /* Returns true or false (if no such extension at all) */
-int xt_find_revision(int af, const char *name, u8 revision, int target,
+int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 		     int *err)
 {
 	int have_rev, best = -1;
@@ -307,37 +321,47 @@ int xt_find_revision(int af, const char *name, u8 revision, int target,
 }
 EXPORT_SYMBOL_GPL(xt_find_revision);
 
-int xt_check_match(const struct xt_match *match, unsigned short family,
-		   unsigned int size, const char *table, unsigned int hook_mask,
-		   unsigned short proto, int inv_proto)
+int xt_check_match(struct xt_mtchk_param *par,
+		   unsigned int size, u_int8_t proto, bool inv_proto)
 {
-	if (XT_ALIGN(match->matchsize) != size) {
+	if (XT_ALIGN(par->match->matchsize) != size &&
+	    par->match->matchsize != -1) {
+		/*
+		 * ebt_among is exempt from centralized matchsize checking
+		 * because it uses a dynamic-size data set.
+		 */
 		printk("%s_tables: %s match: invalid size %Zu != %u\n",
-		       xt_prefix[family], match->name,
-		       XT_ALIGN(match->matchsize), size);
+		       xt_prefix[par->family], par->match->name,
+		       XT_ALIGN(par->match->matchsize), size);
 		return -EINVAL;
 	}
-	if (match->table && strcmp(match->table, table)) {
+	if (par->match->table != NULL &&
+	    strcmp(par->match->table, par->table) != 0) {
 		printk("%s_tables: %s match: only valid in %s table, not %s\n",
-		       xt_prefix[family], match->name, match->table, table);
+		       xt_prefix[par->family], par->match->name,
+		       par->match->table, par->table);
 		return -EINVAL;
 	}
-	if (match->hooks && (hook_mask & ~match->hooks) != 0) {
-		printk("%s_tables: %s match: bad hook_mask %u/%u\n",
-		       xt_prefix[family], match->name, hook_mask, match->hooks);
+	if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
+		printk("%s_tables: %s match: bad hook_mask %#x/%#x\n",
+		       xt_prefix[par->family], par->match->name,
+		       par->hook_mask, par->match->hooks);
 		return -EINVAL;
 	}
-	if (match->proto && (match->proto != proto || inv_proto)) {
+	if (par->match->proto && (par->match->proto != proto || inv_proto)) {
 		printk("%s_tables: %s match: only valid for protocol %u\n",
-		       xt_prefix[family], match->name, match->proto);
+		       xt_prefix[par->family], par->match->name,
+		       par->match->proto);
 		return -EINVAL;
 	}
+	if (par->match->checkentry != NULL && !par->match->checkentry(par))
+		return -EINVAL;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_match);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_add_offset(int af, unsigned int offset, short delta)
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta)
 {
 	struct compat_delta *tmp;
 
@@ -359,7 +383,7 @@ int xt_compat_add_offset(int af, unsigned int offset, short delta)
 }
 EXPORT_SYMBOL_GPL(xt_compat_add_offset);
 
-void xt_compat_flush_offsets(int af)
+void xt_compat_flush_offsets(u_int8_t af)
 {
 	struct compat_delta *tmp, *next;
 
@@ -373,7 +397,7 @@ void xt_compat_flush_offsets(int af)
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
 
-short xt_compat_calc_jump(int af, unsigned int offset)
+short xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 {
 	struct compat_delta *tmp;
 	short delta;
@@ -448,32 +472,36 @@ int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
 EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
 #endif /* CONFIG_COMPAT */
 
-int xt_check_target(const struct xt_target *target, unsigned short family,
-		    unsigned int size, const char *table, unsigned int hook_mask,
-		    unsigned short proto, int inv_proto)
+int xt_check_target(struct xt_tgchk_param *par,
+		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
-	if (XT_ALIGN(target->targetsize) != size) {
+	if (XT_ALIGN(par->target->targetsize) != size) {
 		printk("%s_tables: %s target: invalid size %Zu != %u\n",
-		       xt_prefix[family], target->name,
-		       XT_ALIGN(target->targetsize), size);
+		       xt_prefix[par->family], par->target->name,
+		       XT_ALIGN(par->target->targetsize), size);
 		return -EINVAL;
 	}
-	if (target->table && strcmp(target->table, table)) {
+	if (par->target->table != NULL &&
+	    strcmp(par->target->table, par->table) != 0) {
 		printk("%s_tables: %s target: only valid in %s table, not %s\n",
-		       xt_prefix[family], target->name, target->table, table);
+		       xt_prefix[par->family], par->target->name,
+		       par->target->table, par->table);
 		return -EINVAL;
 	}
-	if (target->hooks && (hook_mask & ~target->hooks) != 0) {
-		printk("%s_tables: %s target: bad hook_mask %u/%u\n",
-		       xt_prefix[family], target->name, hook_mask,
-		       target->hooks);
+	if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
+		printk("%s_tables: %s target: bad hook_mask %#x/%#x\n",
+		       xt_prefix[par->family], par->target->name,
+		       par->hook_mask, par->target->hooks);
 		return -EINVAL;
 	}
-	if (target->proto && (target->proto != proto || inv_proto)) {
+	if (par->target->proto && (par->target->proto != proto || inv_proto)) {
 		printk("%s_tables: %s target: only valid for protocol %u\n",
-		       xt_prefix[family], target->name, target->proto);
+		       xt_prefix[par->family], par->target->name,
+		       par->target->proto);
 		return -EINVAL;
 	}
+	if (par->target->checkentry != NULL && !par->target->checkentry(par))
+		return -EINVAL;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_target);
@@ -590,7 +618,8 @@ void xt_free_table_info(struct xt_table_info *info)
 EXPORT_SYMBOL(xt_free_table_info);
 
 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
-struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name)
+struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
+				    const char *name)
 {
 	struct xt_table *t;
 
@@ -612,13 +641,13 @@ void xt_table_unlock(struct xt_table *table)
 EXPORT_SYMBOL_GPL(xt_table_unlock);
 
 #ifdef CONFIG_COMPAT
-void xt_compat_lock(int af)
+void xt_compat_lock(u_int8_t af)
 {
 	mutex_lock(&xt[af].compat_mutex);
 }
 EXPORT_SYMBOL_GPL(xt_compat_lock);
 
-void xt_compat_unlock(int af)
+void xt_compat_unlock(u_int8_t af)
 {
 	mutex_unlock(&xt[af].compat_mutex);
 }
@@ -722,13 +751,13 @@ EXPORT_SYMBOL_GPL(xt_unregister_table);
 #ifdef CONFIG_PROC_FS
 struct xt_names_priv {
 	struct seq_net_private p;
-	int af;
+	u_int8_t af;
 };
 static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct xt_names_priv *priv = seq->private;
 	struct net *net = seq_file_net(seq);
-	int af = priv->af;
+	u_int8_t af = priv->af;
 
 	mutex_lock(&xt[af].mutex);
 	return seq_list_start(&net->xt.tables[af], *pos);
@@ -738,7 +767,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct xt_names_priv *priv = seq->private;
 	struct net *net = seq_file_net(seq);
-	int af = priv->af;
+	u_int8_t af = priv->af;
 
 	return seq_list_next(v, &net->xt.tables[af], pos);
 }
@@ -746,7 +775,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void xt_table_seq_stop(struct seq_file *seq, void *v)
 {
 	struct xt_names_priv *priv = seq->private;
-	int af = priv->af;
+	u_int8_t af = priv->af;
 
 	mutex_unlock(&xt[af].mutex);
 }
@@ -922,14 +951,14 @@ static const struct file_operations xt_target_ops = {
 
 #endif /* CONFIG_PROC_FS */
 
-int xt_proto_init(struct net *net, int af)
+int xt_proto_init(struct net *net, u_int8_t af)
 {
 #ifdef CONFIG_PROC_FS
 	char buf[XT_FUNCTION_MAXNAMELEN];
 	struct proc_dir_entry *proc;
 #endif
 
-	if (af >= NPROTO)
+	if (af >= ARRAY_SIZE(xt_prefix))
 		return -EINVAL;
 
 
@@ -974,7 +1003,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(xt_proto_init);
 
-void xt_proto_fini(struct net *net, int af)
+void xt_proto_fini(struct net *net, u_int8_t af)
 {
 #ifdef CONFIG_PROC_FS
 	char buf[XT_FUNCTION_MAXNAMELEN];
@@ -998,7 +1027,7 @@ static int __net_init xt_net_init(struct net *net)
 {
 	int i;
 
-	for (i = 0; i < NPROTO; i++)
+	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		INIT_LIST_HEAD(&net->xt.tables[i]);
 	return 0;
 }
@@ -1011,11 +1040,11 @@ static int __init xt_init(void)
 {
 	int i, rv;
 
-	xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
+	xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
 	if (!xt)
 		return -ENOMEM;
 
-	for (i = 0; i < NPROTO; i++) {
+	for (i = 0; i < NFPROTO_NUMPROTO; i++) {
 		mutex_init(&xt[i].mutex);
 #ifdef CONFIG_COMPAT
 		mutex_init(&xt[i].compat_mutex);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 77a52bf8322..011bc80dd2a 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,50 +27,34 @@ MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
 
 static unsigned int
-classify_tg(struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, unsigned int hooknum,
-            const struct xt_target *target, const void *targinfo)
+classify_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_classify_target_info *clinfo = targinfo;
+	const struct xt_classify_target_info *clinfo = par->targinfo;
 
 	skb->priority = clinfo->priority;
 	return XT_CONTINUE;
 }
 
-static struct xt_target classify_tg_reg[] __read_mostly = {
-	{
-		.family		= AF_INET,
-		.name 		= "CLASSIFY",
-		.target 	= classify_tg,
-		.targetsize	= sizeof(struct xt_classify_target_info),
-		.table		= "mangle",
-		.hooks		= (1 << NF_INET_LOCAL_OUT) |
-				  (1 << NF_INET_FORWARD) |
-				  (1 << NF_INET_POST_ROUTING),
-		.me 		= THIS_MODULE,
-	},
-	{
-		.name 		= "CLASSIFY",
-		.family		= AF_INET6,
-		.target 	= classify_tg,
-		.targetsize	= sizeof(struct xt_classify_target_info),
-		.table		= "mangle",
-		.hooks		= (1 << NF_INET_LOCAL_OUT) |
-				  (1 << NF_INET_FORWARD) |
-				  (1 << NF_INET_POST_ROUTING),
-		.me 		= THIS_MODULE,
-	},
+static struct xt_target classify_tg_reg __read_mostly = {
+	.name       = "CLASSIFY",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.table      = "mangle",
+	.hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+		      (1 << NF_INET_POST_ROUTING),
+	.target     = classify_tg,
+	.targetsize = sizeof(struct xt_classify_target_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init classify_tg_init(void)
 {
-	return xt_register_targets(classify_tg_reg,
-	       ARRAY_SIZE(classify_tg_reg));
+	return xt_register_target(&classify_tg_reg);
 }
 
 static void __exit classify_tg_exit(void)
 {
-	xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
+	xt_unregister_target(&classify_tg_reg);
 }
 
 module_init(classify_tg_init);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 5fecfb4794b..d6e5ab46327 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -36,11 +36,9 @@ MODULE_ALIAS("ip6t_CONNMARK");
 #include <net/netfilter/nf_conntrack_ecache.h>
 
 static unsigned int
-connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
-               const struct net_device *out, unsigned int hooknum,
-               const struct xt_target *target, const void *targinfo)
+connmark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_connmark_target_info *markinfo = targinfo;
+	const struct xt_connmark_target_info *markinfo = par->targinfo;
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	u_int32_t diff;
@@ -54,7 +52,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
 			newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
 			if (newmark != ct->mark) {
 				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, skb);
+				nf_conntrack_event_cache(IPCT_MARK, ct);
 			}
 			break;
 		case XT_CONNMARK_SAVE:
@@ -62,7 +60,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
 				  (skb->mark & markinfo->mask);
 			if (ct->mark != newmark) {
 				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, skb);
+				nf_conntrack_event_cache(IPCT_MARK, ct);
 			}
 			break;
 		case XT_CONNMARK_RESTORE:
@@ -77,11 +75,9 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
 }
 
 static unsigned int
-connmark_tg(struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, unsigned int hooknum,
-            const struct xt_target *target, const void *targinfo)
+connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_connmark_tginfo1 *info = targinfo;
+	const struct xt_connmark_tginfo1 *info = par->targinfo;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	u_int32_t newmark;
@@ -95,7 +91,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in,
 		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
 		if (ct->mark != newmark) {
 			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, skb);
+			nf_conntrack_event_cache(IPCT_MARK, ct);
 		}
 		break;
 	case XT_CONNMARK_SAVE:
@@ -103,7 +99,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in,
 		          (skb->mark & info->nfmask);
 		if (ct->mark != newmark) {
 			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, skb);
+			nf_conntrack_event_cache(IPCT_MARK, ct);
 		}
 		break;
 	case XT_CONNMARK_RESTORE:
@@ -116,18 +112,15 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static bool
-connmark_tg_check_v0(const char *tablename, const void *entry,
-                     const struct xt_target *target, void *targinfo,
-                     unsigned int hook_mask)
+static bool connmark_tg_check_v0(const struct xt_tgchk_param *par)
 {
-	const struct xt_connmark_target_info *matchinfo = targinfo;
+	const struct xt_connmark_target_info *matchinfo = par->targinfo;
 
 	if (matchinfo->mode == XT_CONNMARK_RESTORE) {
-		if (strcmp(tablename, "mangle") != 0) {
+		if (strcmp(par->table, "mangle") != 0) {
 			printk(KERN_WARNING "CONNMARK: restore can only be "
 			       "called from \"mangle\" table, not \"%s\"\n",
-			       tablename);
+			       par->table);
 			return false;
 		}
 	}
@@ -135,31 +128,27 @@ connmark_tg_check_v0(const char *tablename, const void *entry,
 		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
 		return false;
 	}
-	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", target->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
 }
 
-static bool
-connmark_tg_check(const char *tablename, const void *entry,
-                  const struct xt_target *target, void *targinfo,
-                  unsigned int hook_mask)
+static bool connmark_tg_check(const struct xt_tgchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", target->family);
+		       "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
 }
 
-static void
-connmark_tg_destroy(const struct xt_target *target, void *targinfo)
+static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 {
-	nf_ct_l3proto_module_put(target->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 #ifdef CONFIG_COMPAT
@@ -197,7 +186,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "CONNMARK",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= connmark_tg_check_v0,
 		.destroy	= connmark_tg_destroy,
 		.target		= connmark_tg_v0,
@@ -210,34 +199,9 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
 		.me		= THIS_MODULE
 	},
 	{
-		.name		= "CONNMARK",
-		.revision	= 0,
-		.family		= AF_INET6,
-		.checkentry	= connmark_tg_check_v0,
-		.destroy	= connmark_tg_destroy,
-		.target		= connmark_tg_v0,
-		.targetsize	= sizeof(struct xt_connmark_target_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
-		.compat_from_user = connmark_tg_compat_from_user_v0,
-		.compat_to_user	= connmark_tg_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE
-	},
-	{
-		.name           = "CONNMARK",
-		.revision       = 1,
-		.family         = AF_INET,
-		.checkentry     = connmark_tg_check,
-		.target         = connmark_tg,
-		.targetsize     = sizeof(struct xt_connmark_tginfo1),
-		.destroy        = connmark_tg_destroy,
-		.me             = THIS_MODULE,
-	},
-	{
 		.name           = "CONNMARK",
 		.revision       = 1,
-		.family         = AF_INET6,
+		.family         = NFPROTO_UNSPEC,
 		.checkentry     = connmark_tg_check,
 		.target         = connmark_tg,
 		.targetsize     = sizeof(struct xt_connmark_tginfo1),
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 76ca1f2421e..b54c3756fdc 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -43,7 +43,7 @@ static void secmark_save(const struct sk_buff *skb)
 		ct = nf_ct_get(skb, &ctinfo);
 		if (ct && !ct->secmark) {
 			ct->secmark = skb->secmark;
-			nf_conntrack_event_cache(IPCT_SECMARK, skb);
+			nf_conntrack_event_cache(IPCT_SECMARK, ct);
 		}
 	}
 }
@@ -65,11 +65,9 @@ static void secmark_restore(struct sk_buff *skb)
 }
 
 static unsigned int
-connsecmark_tg(struct sk_buff *skb, const struct net_device *in,
-               const struct net_device *out, unsigned int hooknum,
-               const struct xt_target *target, const void *targinfo)
+connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_connsecmark_target_info *info = targinfo;
+	const struct xt_connsecmark_target_info *info = par->targinfo;
 
 	switch (info->mode) {
 	case CONNSECMARK_SAVE:
@@ -87,16 +85,14 @@ connsecmark_tg(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static bool
-connsecmark_tg_check(const char *tablename, const void *entry,
-                     const struct xt_target *target, void *targinfo,
-                     unsigned int hook_mask)
+static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_connsecmark_target_info *info = targinfo;
+	const struct xt_connsecmark_target_info *info = par->targinfo;
 
-	if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) {
+	if (strcmp(par->table, "mangle") != 0 &&
+	    strcmp(par->table, "security") != 0) {
 		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
-		       "or \'security\' tables, not \'%s\'.\n", tablename);
+		       "or \'security\' tables, not \'%s\'.\n", par->table);
 		return false;
 	}
 
@@ -110,51 +106,38 @@ connsecmark_tg_check(const char *tablename, const void *entry,
 		return false;
 	}
 
-	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", target->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
 }
 
-static void
-connsecmark_tg_destroy(const struct xt_target *target, void *targinfo)
+static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
 {
-	nf_ct_l3proto_module_put(target->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
-static struct xt_target connsecmark_tg_reg[] __read_mostly = {
-	{
-		.name		= "CONNSECMARK",
-		.family		= AF_INET,
-		.checkentry	= connsecmark_tg_check,
-		.destroy	= connsecmark_tg_destroy,
-		.target		= connsecmark_tg,
-		.targetsize	= sizeof(struct xt_connsecmark_target_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "CONNSECMARK",
-		.family		= AF_INET6,
-		.checkentry	= connsecmark_tg_check,
-		.destroy	= connsecmark_tg_destroy,
-		.target		= connsecmark_tg,
-		.targetsize	= sizeof(struct xt_connsecmark_target_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_target connsecmark_tg_reg __read_mostly = {
+	.name       = "CONNSECMARK",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = connsecmark_tg_check,
+	.destroy    = connsecmark_tg_destroy,
+	.target     = connsecmark_tg,
+	.targetsize = sizeof(struct xt_connsecmark_target_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init connsecmark_tg_init(void)
 {
-	return xt_register_targets(connsecmark_tg_reg,
-	       ARRAY_SIZE(connsecmark_tg_reg));
+	return xt_register_target(&connsecmark_tg_reg);
 }
 
 static void __exit connsecmark_tg_exit(void)
 {
-	xt_unregister_targets(connsecmark_tg_reg,
-	                      ARRAY_SIZE(connsecmark_tg_reg));
+	xt_unregister_target(&connsecmark_tg_reg);
 }
 
 module_init(connsecmark_tg_init);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 97efd74c04f..6a347e768f8 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -29,11 +29,9 @@ MODULE_ALIAS("ipt_TOS");
 MODULE_ALIAS("ip6t_TOS");
 
 static unsigned int
-dscp_tg(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
+dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_DSCP_info *dinfo = targinfo;
+	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
@@ -48,11 +46,9 @@ dscp_tg(struct sk_buff *skb, const struct net_device *in,
 }
 
 static unsigned int
-dscp_tg6(struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, unsigned int hooknum,
-         const struct xt_target *target, const void *targinfo)
+dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_DSCP_info *dinfo = targinfo;
+	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
@@ -65,26 +61,21 @@ dscp_tg6(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static bool
-dscp_tg_check(const char *tablename, const void *e_void,
-              const struct xt_target *target, void *targinfo,
-              unsigned int hook_mask)
+static bool dscp_tg_check(const struct xt_tgchk_param *par)
 {
-	const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
+	const struct xt_DSCP_info *info = par->targinfo;
 
-	if (dscp > XT_DSCP_MAX) {
-		printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
+	if (info->dscp > XT_DSCP_MAX) {
+		printk(KERN_WARNING "DSCP: dscp %x out of range\n", info->dscp);
 		return false;
 	}
 	return true;
 }
 
 static unsigned int
-tos_tg_v0(struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, unsigned int hooknum,
-          const struct xt_target *target, const void *targinfo)
+tos_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ipt_tos_target_info *info = targinfo;
+	const struct ipt_tos_target_info *info = par->targinfo;
 	struct iphdr *iph = ip_hdr(skb);
 	u_int8_t oldtos;
 
@@ -101,12 +92,10 @@ tos_tg_v0(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static bool
-tos_tg_check_v0(const char *tablename, const void *e_void,
-                const struct xt_target *target, void *targinfo,
-                unsigned int hook_mask)
+static bool tos_tg_check_v0(const struct xt_tgchk_param *par)
 {
-	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
+	const struct ipt_tos_target_info *info = par->targinfo;
+	const uint8_t tos = info->tos;
 
 	if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT &&
 	    tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST &&
@@ -119,11 +108,9 @@ tos_tg_check_v0(const char *tablename, const void *e_void,
 }
 
 static unsigned int
-tos_tg(struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
+tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_tos_target_info *info = targinfo;
+	const struct xt_tos_target_info *info = par->targinfo;
 	struct iphdr *iph = ip_hdr(skb);
 	u_int8_t orig, nv;
 
@@ -141,11 +128,9 @@ tos_tg(struct sk_buff *skb, const struct net_device *in,
 }
 
 static unsigned int
-tos_tg6(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
+tos_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_tos_target_info *info = targinfo;
+	const struct xt_tos_target_info *info = par->targinfo;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	u_int8_t orig, nv;
 
@@ -165,7 +150,7 @@ tos_tg6(struct sk_buff *skb, const struct net_device *in,
 static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "DSCP",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= dscp_tg_check,
 		.target		= dscp_tg,
 		.targetsize	= sizeof(struct xt_DSCP_info),
@@ -174,7 +159,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "DSCP",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= dscp_tg_check,
 		.target		= dscp_tg6,
 		.targetsize	= sizeof(struct xt_DSCP_info),
@@ -184,7 +169,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "TOS",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.table		= "mangle",
 		.target		= tos_tg_v0,
 		.targetsize	= sizeof(struct ipt_tos_target_info),
@@ -194,7 +179,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "TOS",
 		.revision	= 1,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.table		= "mangle",
 		.target		= tos_tg,
 		.targetsize	= sizeof(struct xt_tos_target_info),
@@ -203,7 +188,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "TOS",
 		.revision	= 1,
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.table		= "mangle",
 		.target		= tos_tg6,
 		.targetsize	= sizeof(struct xt_tos_target_info),
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index f9ce20b5898..67574bcfb8a 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -25,22 +25,18 @@ MODULE_ALIAS("ipt_MARK");
 MODULE_ALIAS("ip6t_MARK");
 
 static unsigned int
-mark_tg_v0(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+mark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_mark_target_info *markinfo = targinfo;
+	const struct xt_mark_target_info *markinfo = par->targinfo;
 
 	skb->mark = markinfo->mark;
 	return XT_CONTINUE;
 }
 
 static unsigned int
-mark_tg_v1(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+mark_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_mark_target_info_v1 *markinfo = targinfo;
+	const struct xt_mark_target_info_v1 *markinfo = par->targinfo;
 	int mark = 0;
 
 	switch (markinfo->mode) {
@@ -62,22 +58,17 @@ mark_tg_v1(struct sk_buff *skb, const struct net_device *in,
 }
 
 static unsigned int
-mark_tg(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
+mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_mark_tginfo2 *info = targinfo;
+	const struct xt_mark_tginfo2 *info = par->targinfo;
 
 	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
 	return XT_CONTINUE;
 }
 
-static bool
-mark_tg_check_v0(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool mark_tg_check_v0(const struct xt_tgchk_param *par)
 {
-	const struct xt_mark_target_info *markinfo = targinfo;
+	const struct xt_mark_target_info *markinfo = par->targinfo;
 
 	if (markinfo->mark > 0xffffffff) {
 		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
@@ -86,12 +77,9 @@ mark_tg_check_v0(const char *tablename, const void *entry,
 	return true;
 }
 
-static bool
-mark_tg_check_v1(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool mark_tg_check_v1(const struct xt_tgchk_param *par)
 {
-	const struct xt_mark_target_info_v1 *markinfo = targinfo;
+	const struct xt_mark_target_info_v1 *markinfo = par->targinfo;
 
 	if (markinfo->mode != XT_MARK_SET
 	    && markinfo->mode != XT_MARK_AND
@@ -161,7 +149,7 @@ static int mark_tg_compat_to_user_v1(void __user *dst, void *src)
 static struct xt_target mark_tg_reg[] __read_mostly = {
 	{
 		.name		= "MARK",
-		.family		= AF_INET,
+		.family		= NFPROTO_UNSPEC,
 		.revision	= 0,
 		.checkentry	= mark_tg_check_v0,
 		.target		= mark_tg_v0,
@@ -176,7 +164,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "MARK",
-		.family		= AF_INET,
+		.family		= NFPROTO_UNSPEC,
 		.revision	= 1,
 		.checkentry	= mark_tg_check_v1,
 		.target		= mark_tg_v1,
@@ -190,47 +178,9 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 		.me		= THIS_MODULE,
 	},
 	{
-		.name		= "MARK",
-		.family		= AF_INET6,
-		.revision	= 0,
-		.checkentry	= mark_tg_check_v0,
-		.target		= mark_tg_v0,
-		.targetsize	= sizeof(struct xt_mark_target_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_target_info),
-		.compat_from_user = mark_tg_compat_from_user_v0,
-		.compat_to_user	= mark_tg_compat_to_user_v0,
-#endif
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "MARK",
-		.family		= AF_INET6,
-		.revision	= 1,
-		.checkentry	= mark_tg_check_v1,
-		.target		= mark_tg_v1,
-		.targetsize	= sizeof(struct xt_mark_target_info_v1),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
-		.compat_from_user = mark_tg_compat_from_user_v1,
-		.compat_to_user	= mark_tg_compat_to_user_v1,
-#endif
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name           = "MARK",
-		.revision       = 2,
-		.family         = AF_INET,
-		.target         = mark_tg,
-		.targetsize     = sizeof(struct xt_mark_tginfo2),
-		.me             = THIS_MODULE,
-	},
-	{
 		.name           = "MARK",
 		.revision       = 2,
-		.family         = AF_INET6,
+		.family         = NFPROTO_UNSPEC,
 		.target         = mark_tg,
 		.targetsize     = sizeof(struct xt_mark_tginfo2),
 		.me             = THIS_MODULE,
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 19ae8efae65..50e3a52d3b3 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -21,11 +21,9 @@ MODULE_ALIAS("ipt_NFLOG");
 MODULE_ALIAS("ip6t_NFLOG");
 
 static unsigned int
-nflog_tg(struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, unsigned int hooknum,
-         const struct xt_target *target, const void *targinfo)
+nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_nflog_info *info = targinfo;
+	const struct xt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
 
 	li.type		     = NF_LOG_TYPE_ULOG;
@@ -33,17 +31,14 @@ nflog_tg(struct sk_buff *skb, const struct net_device *in,
 	li.u.ulog.group	     = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
-	nf_log_packet(target->family, hooknum, skb, in, out, &li,
-		      "%s", info->prefix);
+	nf_log_packet(par->family, par->hooknum, skb, par->in,
+	              par->out, &li, "%s", info->prefix);
 	return XT_CONTINUE;
 }
 
-static bool
-nflog_tg_check(const char *tablename, const void *entry,
-               const struct xt_target *target, void *targetinfo,
-               unsigned int hookmask)
+static bool nflog_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_nflog_info *info = targetinfo;
+	const struct xt_nflog_info *info = par->targinfo;
 
 	if (info->flags & ~XT_NFLOG_MASK)
 		return false;
@@ -52,33 +47,24 @@ nflog_tg_check(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_target nflog_tg_reg[] __read_mostly = {
-	{
-		.name		= "NFLOG",
-		.family		= AF_INET,
-		.checkentry	= nflog_tg_check,
-		.target		= nflog_tg,
-		.targetsize	= sizeof(struct xt_nflog_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "NFLOG",
-		.family		= AF_INET6,
-		.checkentry	= nflog_tg_check,
-		.target		= nflog_tg,
-		.targetsize	= sizeof(struct xt_nflog_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_target nflog_tg_reg __read_mostly = {
+	.name       = "NFLOG",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = nflog_tg_check,
+	.target     = nflog_tg,
+	.targetsize = sizeof(struct xt_nflog_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init nflog_tg_init(void)
 {
-	return xt_register_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
+	return xt_register_target(&nflog_tg_reg);
 }
 
 static void __exit nflog_tg_exit(void)
 {
-	xt_unregister_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
+	xt_unregister_target(&nflog_tg_reg);
 }
 
 module_init(nflog_tg_init);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index beb24d19a56..2cc1fff4930 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -24,11 +24,9 @@ MODULE_ALIAS("ip6t_NFQUEUE");
 MODULE_ALIAS("arpt_NFQUEUE");
 
 static unsigned int
-nfqueue_tg(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_NFQ_info *tinfo = targinfo;
+	const struct xt_NFQ_info *tinfo = par->targinfo;
 
 	return NF_QUEUE_NR(tinfo->queuenum);
 }
@@ -36,14 +34,14 @@ nfqueue_tg(struct sk_buff *skb, const struct net_device *in,
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFQUEUE",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "NFQUEUE",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 6c9de611eb8..e7a0a54fd4e 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -13,9 +13,7 @@ MODULE_ALIAS("ipt_NOTRACK");
 MODULE_ALIAS("ip6t_NOTRACK");
 
 static unsigned int
-notrack_tg(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+notrack_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	/* Previously seen (loopback)? Ignore. */
 	if (skb->nfct != NULL)
@@ -32,31 +30,23 @@ notrack_tg(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static struct xt_target notrack_tg_reg[] __read_mostly = {
-	{
-		.name		= "NOTRACK",
-		.family		= AF_INET,
-		.target		= notrack_tg,
-		.table		= "raw",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "NOTRACK",
-		.family		= AF_INET6,
-		.target		= notrack_tg,
-		.table		= "raw",
-		.me		= THIS_MODULE,
-	},
+static struct xt_target notrack_tg_reg __read_mostly = {
+	.name     = "NOTRACK",
+	.revision = 0,
+	.family   = NFPROTO_UNSPEC,
+	.target   = notrack_tg,
+	.table    = "raw",
+	.me       = THIS_MODULE,
 };
 
 static int __init notrack_tg_init(void)
 {
-	return xt_register_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
+	return xt_register_target(&notrack_tg_reg);
 }
 
 static void __exit notrack_tg_exit(void)
 {
-	xt_unregister_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
+	xt_unregister_target(&notrack_tg_reg);
 }
 
 module_init(notrack_tg_init);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 64d6ad38029..43f5676b1af 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -71,14 +71,9 @@ void xt_rateest_put(struct xt_rateest *est)
 EXPORT_SYMBOL_GPL(xt_rateest_put);
 
 static unsigned int
-xt_rateest_tg(struct sk_buff *skb,
-	      const struct net_device *in,
-	      const struct net_device *out,
-	      unsigned int hooknum,
-	      const struct xt_target *target,
-	      const void *targinfo)
+xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_rateest_target_info *info = targinfo;
+	const struct xt_rateest_target_info *info = par->targinfo;
 	struct gnet_stats_basic *stats = &info->est->bstats;
 
 	spin_lock_bh(&info->est->lock);
@@ -89,14 +84,9 @@ xt_rateest_tg(struct sk_buff *skb,
 	return XT_CONTINUE;
 }
 
-static bool
-xt_rateest_tg_checkentry(const char *tablename,
-			 const void *entry,
-			 const struct xt_target *target,
-			 void *targinfo,
-			 unsigned int hook_mask)
+static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 {
-	struct xt_rateest_target_info *info = targinfo;
+	struct xt_rateest_target_info *info = par->targinfo;
 	struct xt_rateest *est;
 	struct {
 		struct nlattr		opt;
@@ -149,33 +139,22 @@ err1:
 	return false;
 }
 
-static void xt_rateest_tg_destroy(const struct xt_target *target,
-				  void *targinfo)
+static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
 {
-	struct xt_rateest_target_info *info = targinfo;
+	struct xt_rateest_target_info *info = par->targinfo;
 
 	xt_rateest_put(info->est);
 }
 
-static struct xt_target xt_rateest_target[] __read_mostly = {
-	{
-		.family		= AF_INET,
-		.name		= "RATEEST",
-		.target		= xt_rateest_tg,
-		.checkentry	= xt_rateest_tg_checkentry,
-		.destroy	= xt_rateest_tg_destroy,
-		.targetsize	= sizeof(struct xt_rateest_target_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.family		= AF_INET6,
-		.name		= "RATEEST",
-		.target		= xt_rateest_tg,
-		.checkentry	= xt_rateest_tg_checkentry,
-		.destroy	= xt_rateest_tg_destroy,
-		.targetsize	= sizeof(struct xt_rateest_target_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_target xt_rateest_tg_reg __read_mostly = {
+	.name       = "RATEEST",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.target     = xt_rateest_tg,
+	.checkentry = xt_rateest_tg_checkentry,
+	.destroy    = xt_rateest_tg_destroy,
+	.targetsize = sizeof(struct xt_rateest_target_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init xt_rateest_tg_init(void)
@@ -186,13 +165,12 @@ static int __init xt_rateest_tg_init(void)
 		INIT_HLIST_HEAD(&rateest_hash[i]);
 
 	get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
-	return xt_register_targets(xt_rateest_target,
-				   ARRAY_SIZE(xt_rateest_target));
+	return xt_register_target(&xt_rateest_tg_reg);
 }
 
 static void __exit xt_rateest_tg_fini(void)
 {
-	xt_unregister_targets(xt_rateest_target, ARRAY_SIZE(xt_rateest_target));
+	xt_unregister_target(&xt_rateest_tg_reg);
 }
 
 
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 94f87ee7552..7a6f9e6f5df 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -29,12 +29,10 @@ MODULE_ALIAS("ip6t_SECMARK");
 static u8 mode;
 
 static unsigned int
-secmark_tg(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+secmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	u32 secmark = 0;
-	const struct xt_secmark_target_info *info = targinfo;
+	const struct xt_secmark_target_info *info = par->targinfo;
 
 	BUG_ON(info->mode != mode);
 
@@ -82,16 +80,14 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 	return true;
 }
 
-static bool
-secmark_tg_check(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool secmark_tg_check(const struct xt_tgchk_param *par)
 {
-	struct xt_secmark_target_info *info = targinfo;
+	struct xt_secmark_target_info *info = par->targinfo;
 
-	if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) {
+	if (strcmp(par->table, "mangle") != 0 &&
+	    strcmp(par->table, "security") != 0) {
 		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
-		       "or \'security\' tables, not \'%s\'.\n", tablename);
+		       "or \'security\' tables, not \'%s\'.\n", par->table);
 		return false;
 	}
 
@@ -117,7 +113,7 @@ secmark_tg_check(const char *tablename, const void *entry,
 	return true;
 }
 
-static void secmark_tg_destroy(const struct xt_target *target, void *targinfo)
+static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	switch (mode) {
 	case SECMARK_MODE_SEL:
@@ -125,35 +121,25 @@ static void secmark_tg_destroy(const struct xt_target *target, void *targinfo)
 	}
 }
 
-static struct xt_target secmark_tg_reg[] __read_mostly = {
-	{
-		.name		= "SECMARK",
-		.family		= AF_INET,
-		.checkentry	= secmark_tg_check,
-		.destroy	= secmark_tg_destroy,
-		.target		= secmark_tg,
-		.targetsize	= sizeof(struct xt_secmark_target_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "SECMARK",
-		.family		= AF_INET6,
-		.checkentry	= secmark_tg_check,
-		.destroy	= secmark_tg_destroy,
-		.target		= secmark_tg,
-		.targetsize	= sizeof(struct xt_secmark_target_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_target secmark_tg_reg __read_mostly = {
+	.name       = "SECMARK",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = secmark_tg_check,
+	.destroy    = secmark_tg_destroy,
+	.target     = secmark_tg,
+	.targetsize = sizeof(struct xt_secmark_target_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init secmark_tg_init(void)
 {
-	return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
+	return xt_register_target(&secmark_tg_reg);
 }
 
 static void __exit secmark_tg_exit(void)
 {
-	xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
+	xt_unregister_target(&secmark_tg_reg);
 }
 
 module_init(secmark_tg_init);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index beb5094703c..4f3b1f80879 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -174,15 +174,13 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 }
 
 static unsigned int
-tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	__be16 newlen;
 	int ret;
 
-	ret = tcpmss_mangle_packet(skb, targinfo,
+	ret = tcpmss_mangle_packet(skb, par->targinfo,
 				   tcpmss_reverse_mtu(skb, PF_INET),
 				   iph->ihl * 4,
 				   sizeof(*iph) + sizeof(struct tcphdr));
@@ -199,9 +197,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static unsigned int
-tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u8 nexthdr;
@@ -212,7 +208,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
 	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
 	if (tcphoff < 0)
 		return NF_DROP;
-	ret = tcpmss_mangle_packet(skb, targinfo,
+	ret = tcpmss_mangle_packet(skb, par->targinfo,
 				   tcpmss_reverse_mtu(skb, PF_INET6),
 				   tcphoff,
 				   sizeof(*ipv6h) + sizeof(struct tcphdr));
@@ -241,16 +237,13 @@ static inline bool find_syn_match(const struct xt_entry_match *m)
 	return false;
 }
 
-static bool
-tcpmss_tg4_check(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_tcpmss_info *info = targinfo;
-	const struct ipt_entry *e = entry;
+	const struct xt_tcpmss_info *info = par->targinfo;
+	const struct ipt_entry *e = par->entryinfo;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
-	    (hook_mask & ~((1 << NF_INET_FORWARD) |
+	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		printk("xt_TCPMSS: path-MTU clamping only supported in "
@@ -264,16 +257,13 @@ tcpmss_tg4_check(const char *tablename, const void *entry,
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-static bool
-tcpmss_tg6_check(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_tcpmss_info *info = targinfo;
-	const struct ip6t_entry *e = entry;
+	const struct xt_tcpmss_info *info = par->targinfo;
+	const struct ip6t_entry *e = par->entryinfo;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
-	    (hook_mask & ~((1 << NF_INET_FORWARD) |
+	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		printk("xt_TCPMSS: path-MTU clamping only supported in "
@@ -289,7 +279,7 @@ tcpmss_tg6_check(const char *tablename, const void *entry,
 
 static struct xt_target tcpmss_tg_reg[] __read_mostly = {
 	{
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.name		= "TCPMSS",
 		.checkentry	= tcpmss_tg4_check,
 		.target		= tcpmss_tg4,
@@ -299,7 +289,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = {
 	},
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	{
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.name		= "TCPMSS",
 		.checkentry	= tcpmss_tg6_check,
 		.target		= tcpmss_tg6,
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 9685b6fcbc8..9dd8c8ef63e 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -75,19 +75,15 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
 }
 
 static unsigned int
-tcpoptstrip_tg4(struct sk_buff *skb, const struct net_device *in,
-		const struct net_device *out, unsigned int hooknum,
-		const struct xt_target *target, const void *targinfo)
+tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	return tcpoptstrip_mangle_packet(skb, targinfo, ip_hdrlen(skb),
+	return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb),
 	       sizeof(struct iphdr) + sizeof(struct tcphdr));
 }
 
 #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
 static unsigned int
-tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in,
-		const struct net_device *out, unsigned int hooknum,
-		const struct xt_target *target, const void *targinfo)
+tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	int tcphoff;
@@ -98,7 +94,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in,
 	if (tcphoff < 0)
 		return NF_DROP;
 
-	return tcpoptstrip_mangle_packet(skb, targinfo, tcphoff,
+	return tcpoptstrip_mangle_packet(skb, par->targinfo, tcphoff,
 	       sizeof(*ipv6h) + sizeof(struct tcphdr));
 }
 #endif
@@ -106,7 +102,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in,
 static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
 	{
 		.name       = "TCPOPTSTRIP",
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.table      = "mangle",
 		.proto      = IPPROTO_TCP,
 		.target     = tcpoptstrip_tg4,
@@ -116,7 +112,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
 #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
 	{
 		.name       = "TCPOPTSTRIP",
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.table      = "mangle",
 		.proto      = IPPROTO_TCP,
 		.target     = tcpoptstrip_tg6,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
new file mode 100644
index 00000000000..1340c2fa362
--- /dev/null
+++ b/net/netfilter/xt_TPROXY.c
@@ -0,0 +1,102 @@
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (c) 2006-2007 BalaBit IT Ltd.
+ * Author: Balazs Scheidler, Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+#include <net/inet_sock.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/xt_TPROXY.h>
+
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/nf_tproxy_core.h>
+
+static unsigned int
+tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct xt_tproxy_target_info *tgi = par->targinfo;
+	struct udphdr _hdr, *hp;
+	struct sock *sk;
+
+	hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return NF_DROP;
+
+	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+				   iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
+				   hp->source, tgi->lport ? tgi->lport : hp->dest,
+				   par->in, true);
+
+	/* NOTE: assign_sock consumes our sk reference */
+	if (sk && nf_tproxy_assign_sock(skb, sk)) {
+		/* This should be in a separate target, but we don't do multiple
+		   targets on the same rule yet */
+		skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
+
+		pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n",
+			 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
+			 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+		return NF_ACCEPT;
+	}
+
+	pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n",
+		 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
+		 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+	return NF_DROP;
+}
+
+static bool tproxy_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct ipt_ip *i = par->entryinfo;
+
+	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
+	    && !(i->invflags & IPT_INV_PROTO))
+		return true;
+
+	pr_info("xt_TPROXY: Can be used only in combination with "
+		"either -p tcp or -p udp\n");
+	return false;
+}
+
+static struct xt_target tproxy_tg_reg __read_mostly = {
+	.name		= "TPROXY",
+	.family		= AF_INET,
+	.table		= "mangle",
+	.target		= tproxy_tg,
+	.targetsize	= sizeof(struct xt_tproxy_target_info),
+	.checkentry	= tproxy_tg_check,
+	.hooks		= 1 << NF_INET_PRE_ROUTING,
+	.me		= THIS_MODULE,
+};
+
+static int __init tproxy_tg_init(void)
+{
+	nf_defrag_ipv4_enable();
+	return xt_register_target(&tproxy_tg_reg);
+}
+
+static void __exit tproxy_tg_exit(void)
+{
+	xt_unregister_target(&tproxy_tg_reg);
+}
+
+module_init(tproxy_tg_init);
+module_exit(tproxy_tg_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs");
+MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module.");
+MODULE_ALIAS("ipt_TPROXY");
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 30dab79a343..fbb04b86c46 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -11,39 +11,29 @@ MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
 static unsigned int
-trace_tg(struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, unsigned int hooknum,
-         const struct xt_target *target, const void *targinfo)
+trace_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	skb->nf_trace = 1;
 	return XT_CONTINUE;
 }
 
-static struct xt_target trace_tg_reg[] __read_mostly = {
-	{
-		.name		= "TRACE",
-		.family		= AF_INET,
-		.target		= trace_tg,
-		.table		= "raw",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "TRACE",
-		.family		= AF_INET6,
-		.target		= trace_tg,
-		.table		= "raw",
-		.me		= THIS_MODULE,
-	},
+static struct xt_target trace_tg_reg __read_mostly = {
+	.name       = "TRACE",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.table      = "raw",
+	.target     = trace_tg,
+	.me         = THIS_MODULE,
 };
 
 static int __init trace_tg_init(void)
 {
-	return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
+	return xt_register_target(&trace_tg_reg);
 }
 
 static void __exit trace_tg_exit(void)
 {
-	xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
+	xt_unregister_target(&trace_tg_reg);
 }
 
 module_init(trace_tg_init);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 89f47364e84..e82179832ac 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,40 +16,29 @@ MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
 static bool
-comment_mt(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protooff,
-           bool *hotdrop)
+comment_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	/* We always match */
 	return true;
 }
 
-static struct xt_match comment_mt_reg[] __read_mostly = {
-	{
-		.name		= "comment",
-		.family		= AF_INET,
-		.match		= comment_mt,
-		.matchsize	= sizeof(struct xt_comment_info),
-		.me		= THIS_MODULE
-	},
-	{
-		.name		= "comment",
-		.family		= AF_INET6,
-		.match		= comment_mt,
-		.matchsize	= sizeof(struct xt_comment_info),
-		.me		= THIS_MODULE
-	},
+static struct xt_match comment_mt_reg __read_mostly = {
+	.name      = "comment",
+	.revision  = 0,
+	.family    = NFPROTO_UNSPEC,
+	.match     = comment_mt,
+	.matchsize = sizeof(struct xt_comment_info),
+	.me        = THIS_MODULE,
 };
 
 static int __init comment_mt_init(void)
 {
-	return xt_register_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
+	return xt_register_match(&comment_mt_reg);
 }
 
 static void __exit comment_mt_exit(void)
 {
-	xt_unregister_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
+	xt_unregister_match(&comment_mt_reg);
 }
 
 module_init(comment_mt_init);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 3e39c4fe193..955e6598a7f 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -17,12 +17,9 @@ MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
 static bool
-connbytes_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_connbytes_info *sinfo = matchinfo;
+	const struct xt_connbytes_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	u_int64_t what = 0;	/* initialize to make gcc happy */
@@ -95,12 +92,9 @@ connbytes_mt(const struct sk_buff *skb, const struct net_device *in,
 		return what >= sinfo->count.from;
 }
 
-static bool
-connbytes_mt_check(const char *tablename, const void *ip,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool connbytes_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_connbytes_info *sinfo = matchinfo;
+	const struct xt_connbytes_info *sinfo = par->matchinfo;
 
 	if (sinfo->what != XT_CONNBYTES_PKTS &&
 	    sinfo->what != XT_CONNBYTES_BYTES &&
@@ -112,51 +106,39 @@ connbytes_mt_check(const char *tablename, const void *ip,
 	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
 		return false;
 
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 
 	return true;
 }
 
-static void
-connbytes_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
-static struct xt_match connbytes_mt_reg[] __read_mostly = {
-	{
-		.name		= "connbytes",
-		.family		= AF_INET,
-		.checkentry	= connbytes_mt_check,
-		.match		= connbytes_mt,
-		.destroy	= connbytes_mt_destroy,
-		.matchsize	= sizeof(struct xt_connbytes_info),
-		.me		= THIS_MODULE
-	},
-	{
-		.name		= "connbytes",
-		.family		= AF_INET6,
-		.checkentry	= connbytes_mt_check,
-		.match		= connbytes_mt,
-		.destroy	= connbytes_mt_destroy,
-		.matchsize	= sizeof(struct xt_connbytes_info),
-		.me		= THIS_MODULE
-	},
+static struct xt_match connbytes_mt_reg __read_mostly = {
+	.name       = "connbytes",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = connbytes_mt_check,
+	.match      = connbytes_mt,
+	.destroy    = connbytes_mt_destroy,
+	.matchsize  = sizeof(struct xt_connbytes_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init connbytes_mt_init(void)
 {
-	return xt_register_matches(connbytes_mt_reg,
-	       ARRAY_SIZE(connbytes_mt_reg));
+	return xt_register_match(&connbytes_mt_reg);
 }
 
 static void __exit connbytes_mt_exit(void)
 {
-	xt_unregister_matches(connbytes_mt_reg, ARRAY_SIZE(connbytes_mt_reg));
+	xt_unregister_match(&connbytes_mt_reg);
 }
 
 module_init(connbytes_mt_init);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 70907f6baac..7f404cc64c8 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -82,9 +82,9 @@ static inline bool already_closed(const struct nf_conn *conn)
 static inline unsigned int
 same_source_net(const union nf_inet_addr *addr,
 		const union nf_inet_addr *mask,
-		const union nf_inet_addr *u3, unsigned int family)
+		const union nf_inet_addr *u3, u_int8_t family)
 {
-	if (family == AF_INET) {
+	if (family == NFPROTO_IPV4) {
 		return (addr->ip & mask->ip) == (u3->ip & mask->ip);
 	} else {
 		union nf_inet_addr lh, rh;
@@ -114,7 +114,7 @@ static int count_them(struct xt_connlimit_data *data,
 	int matches = 0;
 
 
-	if (match->family == AF_INET6)
+	if (match->family == NFPROTO_IPV6)
 		hash = &data->iphash[connlimit_iphash6(addr, mask)];
 	else
 		hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)];
@@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data,
 
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, tmp, hash, list) {
-		found    = __nf_conntrack_find(&conn->tuple);
+		found    = __nf_conntrack_find(&init_net, &conn->tuple);
 		found_ct = NULL;
 
 		if (found != NULL)
@@ -178,12 +178,9 @@ static int count_them(struct xt_connlimit_data *data,
 }
 
 static bool
-connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_connlimit_info *info = matchinfo;
+	const struct xt_connlimit_info *info = par->matchinfo;
 	union nf_inet_addr addr;
 	struct nf_conntrack_tuple tuple;
 	const struct nf_conntrack_tuple *tuple_ptr = &tuple;
@@ -195,10 +192,10 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
 	if (ct != NULL)
 		tuple_ptr = &ct->tuplehash[0].tuple;
 	else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-				    match->family, &tuple))
+				    par->family, &tuple))
 		goto hotdrop;
 
-	if (match->family == AF_INET6) {
+	if (par->family == NFPROTO_IPV6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
 		memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
 	} else {
@@ -208,40 +205,37 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
 
 	spin_lock_bh(&info->data->lock);
 	connections = count_them(info->data, tuple_ptr, &addr,
-	                         &info->mask, match);
+	                         &info->mask, par->match);
 	spin_unlock_bh(&info->data->lock);
 
 	if (connections < 0) {
 		/* kmalloc failed, drop it entirely */
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
 	return (connections > info->limit) ^ info->inverse;
 
  hotdrop:
-	*hotdrop = true;
+	*par->hotdrop = true;
 	return false;
 }
 
-static bool
-connlimit_mt_check(const char *tablename, const void *ip,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool connlimit_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_connlimit_info *info = matchinfo;
+	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
 
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
-		       "address family %u\n", match->family);
+		       "address family %u\n", par->family);
 		return false;
 	}
 
 	/* init private data */
 	info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
 	if (info->data == NULL) {
-		nf_ct_l3proto_module_put(match->family);
+		nf_ct_l3proto_module_put(par->family);
 		return false;
 	}
 
@@ -252,16 +246,15 @@ connlimit_mt_check(const char *tablename, const void *ip,
 	return true;
 }
 
-static void
-connlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	const struct xt_connlimit_info *info = matchinfo;
+	const struct xt_connlimit_info *info = par->matchinfo;
 	struct xt_connlimit_conn *conn;
 	struct xt_connlimit_conn *tmp;
 	struct list_head *hash = info->data->iphash;
 	unsigned int i;
 
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->family);
 
 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
 		list_for_each_entry_safe(conn, tmp, &hash[i], list) {
@@ -273,41 +266,30 @@ connlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
 	kfree(info->data);
 }
 
-static struct xt_match connlimit_mt_reg[] __read_mostly = {
-	{
-		.name       = "connlimit",
-		.family     = AF_INET,
-		.checkentry = connlimit_mt_check,
-		.match      = connlimit_mt,
-		.matchsize  = sizeof(struct xt_connlimit_info),
-		.destroy    = connlimit_mt_destroy,
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "connlimit",
-		.family     = AF_INET6,
-		.checkentry = connlimit_mt_check,
-		.match      = connlimit_mt,
-		.matchsize  = sizeof(struct xt_connlimit_info),
-		.destroy    = connlimit_mt_destroy,
-		.me         = THIS_MODULE,
-	},
+static struct xt_match connlimit_mt_reg __read_mostly = {
+	.name       = "connlimit",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = connlimit_mt_check,
+	.match      = connlimit_mt,
+	.matchsize  = sizeof(struct xt_connlimit_info),
+	.destroy    = connlimit_mt_destroy,
+	.me         = THIS_MODULE,
 };
 
 static int __init connlimit_mt_init(void)
 {
-	return xt_register_matches(connlimit_mt_reg,
-	       ARRAY_SIZE(connlimit_mt_reg));
+	return xt_register_match(&connlimit_mt_reg);
 }
 
 static void __exit connlimit_mt_exit(void)
 {
-	xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
+	xt_unregister_match(&connlimit_mt_reg);
 }
 
 module_init(connlimit_mt_init);
 module_exit(connlimit_mt_exit);
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: Number of connections matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_connlimit");
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index aaa1b96691f..86cacab7a4a 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -34,12 +34,9 @@ MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
 static bool
-connmark_mt(const struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, const struct xt_match *match,
-            const void *matchinfo, int offset, unsigned int protoff,
-            bool *hotdrop)
+connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_connmark_mtinfo1 *info = matchinfo;
+	const struct xt_connmark_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn *ct;
 
@@ -51,12 +48,9 @@ connmark_mt(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-               const struct net_device *out, const struct xt_match *match,
-               const void *matchinfo, int offset, unsigned int protoff,
-               bool *hotdrop)
+connmark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_connmark_info *info = matchinfo;
+	const struct xt_connmark_info *info = par->matchinfo;
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 
@@ -67,42 +61,35 @@ connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool
-connmark_mt_check_v0(const char *tablename, const void *ip,
-                     const struct xt_match *match, void *matchinfo,
-                     unsigned int hook_mask)
+static bool connmark_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct xt_connmark_info *cm = matchinfo;
+	const struct xt_connmark_info *cm = par->matchinfo;
 
 	if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
 		printk(KERN_WARNING "connmark: only support 32bit mark\n");
 		return false;
 	}
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
 }
 
-static bool
-connmark_mt_check(const char *tablename, const void *ip,
-                  const struct xt_match *match, void *matchinfo,
-                  unsigned int hook_mask)
+static bool connmark_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", match->family);
+		       "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
 }
 
-static void
-connmark_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 #ifdef CONFIG_COMPAT
@@ -140,7 +127,7 @@ static struct xt_match connmark_mt_reg[] __read_mostly = {
 	{
 		.name		= "connmark",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= connmark_mt_check_v0,
 		.match		= connmark_mt_v0,
 		.destroy	= connmark_mt_destroy,
@@ -153,34 +140,9 @@ static struct xt_match connmark_mt_reg[] __read_mostly = {
 		.me		= THIS_MODULE
 	},
 	{
-		.name		= "connmark",
-		.revision	= 0,
-		.family		= AF_INET6,
-		.checkentry	= connmark_mt_check_v0,
-		.match		= connmark_mt_v0,
-		.destroy	= connmark_mt_destroy,
-		.matchsize	= sizeof(struct xt_connmark_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_connmark_info),
-		.compat_from_user = connmark_mt_compat_from_user_v0,
-		.compat_to_user	= connmark_mt_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE
-	},
-	{
-		.name           = "connmark",
-		.revision       = 1,
-		.family         = AF_INET,
-		.checkentry     = connmark_mt_check,
-		.match          = connmark_mt,
-		.matchsize      = sizeof(struct xt_connmark_mtinfo1),
-		.destroy        = connmark_mt_destroy,
-		.me             = THIS_MODULE,
-	},
-	{
 		.name           = "connmark",
 		.revision       = 1,
-		.family         = AF_INET6,
+		.family         = NFPROTO_UNSPEC,
 		.checkentry     = connmark_mt_check,
 		.match          = connmark_mt,
 		.matchsize      = sizeof(struct xt_connmark_mtinfo1),
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index d61412f58ef..0b7139f3dd7 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -25,12 +25,9 @@ MODULE_ALIAS("ipt_conntrack");
 MODULE_ALIAS("ip6t_conntrack");
 
 static bool
-conntrack_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-                const struct net_device *out, const struct xt_match *match,
-                const void *matchinfo, int offset, unsigned int protoff,
-                bool *hotdrop)
+conntrack_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_conntrack_info *sinfo = matchinfo;
+	const struct xt_conntrack_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
@@ -121,9 +118,9 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr,
                   const union nf_inet_addr *uaddr,
                   const union nf_inet_addr *umask, unsigned int l3proto)
 {
-	if (l3proto == AF_INET)
+	if (l3proto == NFPROTO_IPV4)
 		return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
-	else if (l3proto == AF_INET6)
+	else if (l3proto == NFPROTO_IPV6)
 		return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
 		       &uaddr->in6) == 0;
 	else
@@ -133,7 +130,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr,
 static inline bool
 conntrack_mt_origsrc(const struct nf_conn *ct,
                      const struct xt_conntrack_mtinfo1 *info,
-                     unsigned int family)
+		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
 	       &info->origsrc_addr, &info->origsrc_mask, family);
@@ -142,7 +139,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct,
 static inline bool
 conntrack_mt_origdst(const struct nf_conn *ct,
                      const struct xt_conntrack_mtinfo1 *info,
-                     unsigned int family)
+		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3,
 	       &info->origdst_addr, &info->origdst_mask, family);
@@ -151,7 +148,7 @@ conntrack_mt_origdst(const struct nf_conn *ct,
 static inline bool
 conntrack_mt_replsrc(const struct nf_conn *ct,
                      const struct xt_conntrack_mtinfo1 *info,
-                     unsigned int family)
+		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3,
 	       &info->replsrc_addr, &info->replsrc_mask, family);
@@ -160,7 +157,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct,
 static inline bool
 conntrack_mt_repldst(const struct nf_conn *ct,
                      const struct xt_conntrack_mtinfo1 *info,
-                     unsigned int family)
+		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3,
 	       &info->repldst_addr, &info->repldst_mask, family);
@@ -205,12 +202,9 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info,
 }
 
 static bool
-conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_conntrack_mtinfo1 *info = matchinfo;
+	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn *ct;
 	unsigned int statebit;
@@ -244,22 +238,22 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
 		return false;
 
 	if (info->match_flags & XT_CONNTRACK_ORIGSRC)
-		if (conntrack_mt_origsrc(ct, info, match->family) ^
+		if (conntrack_mt_origsrc(ct, info, par->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_ORIGDST)
-		if (conntrack_mt_origdst(ct, info, match->family) ^
+		if (conntrack_mt_origdst(ct, info, par->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_ORIGDST))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_REPLSRC)
-		if (conntrack_mt_replsrc(ct, info, match->family) ^
+		if (conntrack_mt_replsrc(ct, info, par->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_REPLSRC))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_REPLDST)
-		if (conntrack_mt_repldst(ct, info, match->family) ^
+		if (conntrack_mt_repldst(ct, info, par->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_REPLDST))
 			return false;
 
@@ -284,23 +278,19 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
 	return true;
 }
 
-static bool
-conntrack_mt_check(const char *tablename, const void *ip,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool conntrack_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
 }
 
-static void
-conntrack_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 #ifdef CONFIG_COMPAT
@@ -356,7 +346,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
 	{
 		.name       = "conntrack",
 		.revision   = 0,
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.match      = conntrack_mt_v0,
 		.checkentry = conntrack_mt_check,
 		.destroy    = conntrack_mt_destroy,
@@ -371,17 +361,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
 	{
 		.name       = "conntrack",
 		.revision   = 1,
-		.family     = AF_INET,
-		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
-		.match      = conntrack_mt,
-		.checkentry = conntrack_mt_check,
-		.destroy    = conntrack_mt_destroy,
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "conntrack",
-		.revision   = 1,
-		.family     = AF_INET6,
+		.family     = NFPROTO_UNSPEC,
 		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
 		.match      = conntrack_mt,
 		.checkentry = conntrack_mt_check,
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 8b6522186d9..e5d3e867328 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -93,20 +93,18 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 }
 
 static bool
-dccp_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_dccp_info *info = matchinfo;
+	const struct xt_dccp_info *info = par->matchinfo;
 	const struct dccp_hdr *dh;
 	struct dccp_hdr _dh;
 
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh);
+	dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh);
 	if (dh == NULL) {
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
@@ -118,17 +116,14 @@ dccp_mt(const struct sk_buff *skb, const struct net_device *in,
 			XT_DCCP_DEST_PORTS, info->flags, info->invflags)
 		&& DCCHECK(match_types(dh, info->typemask),
 			   XT_DCCP_TYPE, info->flags, info->invflags)
-		&& DCCHECK(match_option(info->option, skb, protoff, dh,
-					hotdrop),
+		&& DCCHECK(match_option(info->option, skb, par->thoff, dh,
+					par->hotdrop),
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
-static bool
-dccp_mt_check(const char *tablename, const void *inf,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+static bool dccp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_dccp_info *info = matchinfo;
+	const struct xt_dccp_info *info = par->matchinfo;
 
 	return !(info->flags & ~XT_DCCP_VALID_FLAGS)
 		&& !(info->invflags & ~XT_DCCP_VALID_FLAGS)
@@ -138,7 +133,7 @@ dccp_mt_check(const char *tablename, const void *inf,
 static struct xt_match dccp_mt_reg[] __read_mostly = {
 	{
 		.name 		= "dccp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= dccp_mt_check,
 		.match		= dccp_mt,
 		.matchsize	= sizeof(struct xt_dccp_info),
@@ -147,7 +142,7 @@ static struct xt_match dccp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name 		= "dccp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= dccp_mt_check,
 		.match		= dccp_mt,
 		.matchsize	= sizeof(struct xt_dccp_info),
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 26f4aab9c42..c3f8085460d 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -26,61 +26,48 @@ MODULE_ALIAS("ipt_tos");
 MODULE_ALIAS("ip6t_tos");
 
 static bool
-dscp_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_dscp_info *info = matchinfo;
+	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
 static bool
-dscp_mt6(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_dscp_info *info = matchinfo;
+	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
-static bool
-dscp_mt_check(const char *tablename, const void *info,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+static bool dscp_mt_check(const struct xt_mtchk_param *par)
 {
-	const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
+	const struct xt_dscp_info *info = par->matchinfo;
 
-	if (dscp > XT_DSCP_MAX) {
-		printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp);
+	if (info->dscp > XT_DSCP_MAX) {
+		printk(KERN_ERR "xt_dscp: dscp %x out of range\n", info->dscp);
 		return false;
 	}
 
 	return true;
 }
 
-static bool tos_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-                      const struct net_device *out,
-                      const struct xt_match *match, const void *matchinfo,
-                      int offset, unsigned int protoff, bool *hotdrop)
+static bool
+tos_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_tos_info *info = matchinfo;
+	const struct ipt_tos_info *info = par->matchinfo;
 
 	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
 }
 
-static bool tos_mt(const struct sk_buff *skb, const struct net_device *in,
-                   const struct net_device *out, const struct xt_match *match,
-                   const void *matchinfo, int offset, unsigned int protoff,
-                   bool *hotdrop)
+static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_tos_match_info *info = matchinfo;
+	const struct xt_tos_match_info *info = par->matchinfo;
 
-	if (match->family == AF_INET)
+	if (par->match->family == NFPROTO_IPV4)
 		return ((ip_hdr(skb)->tos & info->tos_mask) ==
 		       info->tos_value) ^ !!info->invert;
 	else
@@ -91,7 +78,7 @@ static bool tos_mt(const struct sk_buff *skb, const struct net_device *in,
 static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "dscp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= dscp_mt_check,
 		.match		= dscp_mt,
 		.matchsize	= sizeof(struct xt_dscp_info),
@@ -99,7 +86,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "dscp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= dscp_mt_check,
 		.match		= dscp_mt6,
 		.matchsize	= sizeof(struct xt_dscp_info),
@@ -108,7 +95,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "tos",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= tos_mt_v0,
 		.matchsize	= sizeof(struct ipt_tos_info),
 		.me		= THIS_MODULE,
@@ -116,7 +103,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "tos",
 		.revision	= 1,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= tos_mt,
 		.matchsize	= sizeof(struct xt_tos_match_info),
 		.me		= THIS_MODULE,
@@ -124,7 +111,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "tos",
 		.revision	= 1,
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= tos_mt,
 		.matchsize	= sizeof(struct xt_tos_match_info),
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index a133eb9b23e..609439967c2 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -42,26 +42,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool
-esp_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct ip_esp_hdr *eh;
 	struct ip_esp_hdr _esp;
-	const struct xt_esp *espinfo = matchinfo;
+	const struct xt_esp *espinfo = par->matchinfo;
 
 	/* Must not be a fragment. */
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	eh = skb_header_pointer(skb, protoff, sizeof(_esp), &_esp);
+	eh = skb_header_pointer(skb, par->thoff, sizeof(_esp), &_esp);
 	if (eh == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ESP tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
@@ -69,13 +66,9 @@ esp_mt(const struct sk_buff *skb, const struct net_device *in,
 			 !!(espinfo->invflags & XT_ESP_INV_SPI));
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-esp_mt_check(const char *tablename, const void *ip_void,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool esp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_esp *espinfo = matchinfo;
+	const struct xt_esp *espinfo = par->matchinfo;
 
 	if (espinfo->invflags & ~XT_ESP_INV_MASK) {
 		duprintf("xt_esp: unknown flags %X\n", espinfo->invflags);
@@ -88,7 +81,7 @@ esp_mt_check(const char *tablename, const void *ip_void,
 static struct xt_match esp_mt_reg[] __read_mostly = {
 	{
 		.name		= "esp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= esp_mt_check,
 		.match		= esp_mt,
 		.matchsize	= sizeof(struct xt_esp),
@@ -97,7 +90,7 @@ static struct xt_match esp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "esp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= esp_mt_check,
 		.match		= esp_mt,
 		.matchsize	= sizeof(struct xt_esp),
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index d9418a26781..6fc4292d46e 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -80,7 +80,7 @@ struct dsthash_ent {
 struct xt_hashlimit_htable {
 	struct hlist_node node;		/* global list of all htables */
 	atomic_t use;
-	int family;
+	u_int8_t family;
 
 	struct hashlimit_cfg1 cfg;	/* config */
 
@@ -185,7 +185,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(unsigned long htlong);
 
-static int htable_create_v0(struct xt_hashlimit_info *minfo, int family)
+static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 {
 	struct xt_hashlimit_htable *hinfo;
 	unsigned int size;
@@ -218,7 +218,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family)
 	hinfo->cfg.gc_interval = minfo->cfg.gc_interval;
 	hinfo->cfg.expire      = minfo->cfg.expire;
 
-	if (family == AF_INET)
+	if (family == NFPROTO_IPV4)
 		hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32;
 	else
 		hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128;
@@ -237,11 +237,10 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family)
 	hinfo->family = family;
 	hinfo->rnd_initialized = 0;
 	spin_lock_init(&hinfo->lock);
-	hinfo->pde =
-		proc_create_data(minfo->name, 0,
-				 family == AF_INET ? hashlimit_procdir4 :
-						     hashlimit_procdir6,
-				 &dl_file_ops, hinfo);
+	hinfo->pde = proc_create_data(minfo->name, 0,
+		(family == NFPROTO_IPV4) ?
+		hashlimit_procdir4 : hashlimit_procdir6,
+		&dl_file_ops, hinfo);
 	if (!hinfo->pde) {
 		vfree(hinfo);
 		return -1;
@@ -258,8 +257,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family)
 	return 0;
 }
 
-static int htable_create(struct xt_hashlimit_mtinfo1 *minfo,
-                         unsigned int family)
+static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 {
 	struct xt_hashlimit_htable *hinfo;
 	unsigned int size;
@@ -301,11 +299,10 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo,
 	hinfo->rnd_initialized = 0;
 	spin_lock_init(&hinfo->lock);
 
-	hinfo->pde =
-		proc_create_data(minfo->name, 0,
-				 family == AF_INET ? hashlimit_procdir4 :
-						     hashlimit_procdir6,
-				 &dl_file_ops, hinfo);
+	hinfo->pde = proc_create_data(minfo->name, 0,
+		(family == NFPROTO_IPV4) ?
+		hashlimit_procdir4 : hashlimit_procdir6,
+		&dl_file_ops, hinfo);
 	if (hinfo->pde == NULL) {
 		vfree(hinfo);
 		return -1;
@@ -371,14 +368,14 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 
 	/* remove proc entry */
 	remove_proc_entry(hinfo->pde->name,
-			  hinfo->family == AF_INET ? hashlimit_procdir4 :
+			  hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 :
 						     hashlimit_procdir6);
 	htable_selective_cleanup(hinfo, select_all);
 	vfree(hinfo);
 }
 
 static struct xt_hashlimit_htable *htable_find_get(const char *name,
-						   int family)
+						   u_int8_t family)
 {
 	struct xt_hashlimit_htable *hinfo;
 	struct hlist_node *pos;
@@ -502,7 +499,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 	memset(dst, 0, sizeof(*dst));
 
 	switch (hinfo->family) {
-	case AF_INET:
+	case NFPROTO_IPV4:
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
 			dst->ip.dst = maskl(ip_hdr(skb)->daddr,
 			              hinfo->cfg.dstmask);
@@ -516,7 +513,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 		nexthdr = ip_hdr(skb)->protocol;
 		break;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	case AF_INET6:
+	case NFPROTO_IPV6:
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) {
 			memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr,
 			       sizeof(dst->ip6.dst));
@@ -566,19 +563,16 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 }
 
 static bool
-hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-                const struct net_device *out, const struct xt_match *match,
-                const void *matchinfo, int offset, unsigned int protoff,
-                bool *hotdrop)
+hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct xt_hashlimit_info *r =
-		((const struct xt_hashlimit_info *)matchinfo)->u.master;
+		((const struct xt_hashlimit_info *)par->matchinfo)->u.master;
 	struct xt_hashlimit_htable *hinfo = r->hinfo;
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
 	struct dsthash_dst dst;
 
-	if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0)
+	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
 
 	spin_lock_bh(&hinfo->lock);
@@ -616,23 +610,20 @@ hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 	return false;
 
 hotdrop:
-	*hotdrop = true;
+	*par->hotdrop = true;
 	return false;
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_hashlimit_mtinfo1 *info = matchinfo;
+	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
 	struct dsthash_dst dst;
 
-	if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0)
+	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
 
 	spin_lock_bh(&hinfo->lock);
@@ -669,16 +660,13 @@ hashlimit_mt(const struct sk_buff *skb, const struct net_device *in,
 	return info->cfg.mode & XT_HASHLIMIT_INVERT;
 
  hotdrop:
-	*hotdrop = true;
+	*par->hotdrop = true;
 	return false;
 }
 
-static bool
-hashlimit_mt_check_v0(const char *tablename, const void *inf,
-                      const struct xt_match *match, void *matchinfo,
-                      unsigned int hook_mask)
+static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	struct xt_hashlimit_info *r = matchinfo;
+	struct xt_hashlimit_info *r = par->matchinfo;
 
 	/* Check for overflow. */
 	if (r->cfg.burst == 0 ||
@@ -707,8 +695,8 @@ hashlimit_mt_check_v0(const char *tablename, const void *inf,
 	 * the list of htable's in htable_create(), since then we would
 	 * create duplicate proc files. -HW */
 	mutex_lock(&hlimit_mutex);
-	r->hinfo = htable_find_get(r->name, match->family);
-	if (!r->hinfo && htable_create_v0(r, match->family) != 0) {
+	r->hinfo = htable_find_get(r->name, par->match->family);
+	if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) {
 		mutex_unlock(&hlimit_mutex);
 		return false;
 	}
@@ -719,12 +707,9 @@ hashlimit_mt_check_v0(const char *tablename, const void *inf,
 	return true;
 }
 
-static bool
-hashlimit_mt_check(const char *tablename, const void *inf,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_hashlimit_mtinfo1 *info = matchinfo;
+	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 
 	/* Check for overflow. */
 	if (info->cfg.burst == 0 ||
@@ -738,7 +723,7 @@ hashlimit_mt_check(const char *tablename, const void *inf,
 		return false;
 	if (info->name[sizeof(info->name)-1] != '\0')
 		return false;
-	if (match->family == AF_INET) {
+	if (par->match->family == NFPROTO_IPV4) {
 		if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
 			return false;
 	} else {
@@ -753,8 +738,8 @@ hashlimit_mt_check(const char *tablename, const void *inf,
 	 * the list of htable's in htable_create(), since then we would
 	 * create duplicate proc files. -HW */
 	mutex_lock(&hlimit_mutex);
-	info->hinfo = htable_find_get(info->name, match->family);
-	if (!info->hinfo && htable_create(info, match->family) != 0) {
+	info->hinfo = htable_find_get(info->name, par->match->family);
+	if (!info->hinfo && htable_create(info, par->match->family) != 0) {
 		mutex_unlock(&hlimit_mutex);
 		return false;
 	}
@@ -763,17 +748,16 @@ hashlimit_mt_check(const char *tablename, const void *inf,
 }
 
 static void
-hashlimit_mt_destroy_v0(const struct xt_match *match, void *matchinfo)
+hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par)
 {
-	const struct xt_hashlimit_info *r = matchinfo;
+	const struct xt_hashlimit_info *r = par->matchinfo;
 
 	htable_put(r->hinfo);
 }
 
-static void
-hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	const struct xt_hashlimit_mtinfo1 *info = matchinfo;
+	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 
 	htable_put(info->hinfo);
 }
@@ -806,7 +790,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 	{
 		.name		= "hashlimit",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= hashlimit_mt_v0,
 		.matchsize	= sizeof(struct xt_hashlimit_info),
 #ifdef CONFIG_COMPAT
@@ -821,7 +805,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 	{
 		.name           = "hashlimit",
 		.revision       = 1,
-		.family         = AF_INET,
+		.family         = NFPROTO_IPV4,
 		.match          = hashlimit_mt,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
 		.checkentry     = hashlimit_mt_check,
@@ -831,7 +815,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	{
 		.name		= "hashlimit",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= hashlimit_mt_v0,
 		.matchsize	= sizeof(struct xt_hashlimit_info),
 #ifdef CONFIG_COMPAT
@@ -846,7 +830,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 	{
 		.name           = "hashlimit",
 		.revision       = 1,
-		.family         = AF_INET6,
+		.family         = NFPROTO_IPV6,
 		.match          = hashlimit_mt,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
 		.checkentry     = hashlimit_mt_check,
@@ -901,14 +885,14 @@ static void dl_seq_stop(struct seq_file *s, void *v)
 	spin_unlock_bh(&htable->lock);
 }
 
-static int dl_seq_real_show(struct dsthash_ent *ent, int family,
+static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				   struct seq_file *s)
 {
 	/* recalculate to show accurate numbers */
 	rateinfo_recalc(ent, jiffies);
 
 	switch (family) {
-	case AF_INET:
+	case NFPROTO_IPV4:
 		return seq_printf(s, "%ld %u.%u.%u.%u:%u->"
 				     "%u.%u.%u.%u:%u %u %u %u\n",
 				 (long)(ent->expires - jiffies)/HZ,
@@ -919,7 +903,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family,
 				 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 				 ent->rateinfo.cost);
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	case AF_INET6:
+	case NFPROTO_IPV6:
 		return seq_printf(s, "%ld " NIP6_FMT ":%u->"
 				     NIP6_FMT ":%u %u %u %u\n",
 				 (long)(ent->expires - jiffies)/HZ,
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index dada2905d66..64fc7f27722 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,12 +24,9 @@ MODULE_ALIAS("ip6t_helper");
 
 
 static bool
-helper_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_helper_info *info = matchinfo;
+	const struct xt_helper_info *info = par->matchinfo;
 	const struct nf_conn *ct;
 	const struct nf_conn_help *master_help;
 	const struct nf_conntrack_helper *helper;
@@ -57,56 +54,43 @@ helper_mt(const struct sk_buff *skb, const struct net_device *in,
 	return ret;
 }
 
-static bool
-helper_mt_check(const char *tablename, const void *inf,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
+static bool helper_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_helper_info *info = matchinfo;
+	struct xt_helper_info *info = par->matchinfo;
 
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	info->name[29] = '\0';
 	return true;
 }
 
-static void helper_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void helper_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
-static struct xt_match helper_mt_reg[] __read_mostly = {
-	{
-		.name		= "helper",
-		.family		= AF_INET,
-		.checkentry	= helper_mt_check,
-		.match		= helper_mt,
-		.destroy	= helper_mt_destroy,
-		.matchsize	= sizeof(struct xt_helper_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "helper",
-		.family		= AF_INET6,
-		.checkentry	= helper_mt_check,
-		.match		= helper_mt,
-		.destroy	= helper_mt_destroy,
-		.matchsize	= sizeof(struct xt_helper_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match helper_mt_reg __read_mostly = {
+	.name       = "helper",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = helper_mt_check,
+	.match      = helper_mt,
+	.destroy    = helper_mt_destroy,
+	.matchsize  = sizeof(struct xt_helper_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init helper_mt_init(void)
 {
-	return xt_register_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
+	return xt_register_match(&helper_mt_reg);
 }
 
 static void __exit helper_mt_exit(void)
 {
-	xt_unregister_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
+	xt_unregister_match(&helper_mt_reg);
 }
 
 module_init(helper_mt_init);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index c63e9333c75..6f62c36948d 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -17,12 +17,9 @@
 #include <linux/netfilter_ipv4/ipt_iprange.h>
 
 static bool
-iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-              const struct net_device *out, const struct xt_match *match,
-              const void *matchinfo, int offset, unsigned int protoff,
-              bool *hotdrop)
+iprange_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_iprange_info *info = matchinfo;
+	const struct ipt_iprange_info *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
 
 	if (info->flags & IPRANGE_SRC) {
@@ -55,12 +52,9 @@ iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-iprange_mt4(const struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, const struct xt_match *match,
-            const void *matchinfo, int offset, unsigned int protoff,
-            bool *hotdrop)
+iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_iprange_mtinfo *info = matchinfo;
+	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
 	bool m;
 
@@ -111,12 +105,9 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
 }
 
 static bool
-iprange_mt6(const struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, const struct xt_match *match,
-            const void *matchinfo, int offset, unsigned int protoff,
-            bool *hotdrop)
+iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_iprange_mtinfo *info = matchinfo;
+	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	bool m;
 
@@ -141,7 +132,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = {
 	{
 		.name      = "iprange",
 		.revision  = 0,
-		.family    = AF_INET,
+		.family    = NFPROTO_IPV4,
 		.match     = iprange_mt_v0,
 		.matchsize = sizeof(struct ipt_iprange_info),
 		.me        = THIS_MODULE,
@@ -149,7 +140,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = {
 	{
 		.name      = "iprange",
 		.revision  = 1,
-		.family    = AF_INET,
+		.family    = NFPROTO_IPV4,
 		.match     = iprange_mt4,
 		.matchsize = sizeof(struct xt_iprange_mtinfo),
 		.me        = THIS_MODULE,
@@ -157,7 +148,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = {
 	{
 		.name      = "iprange",
 		.revision  = 1,
-		.family    = AF_INET6,
+		.family    = NFPROTO_IPV6,
 		.match     = iprange_mt6,
 		.matchsize = sizeof(struct xt_iprange_mtinfo),
 		.me        = THIS_MODULE,
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index b8640f97295..c4871ca6c86 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,24 +21,18 @@ MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
 static bool
-length_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_length_info *info = matchinfo;
+	const struct xt_length_info *info = par->matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
 
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
 
 static bool
-length_mt6(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protoff,
-           bool *hotdrop)
+length_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_length_info *info = matchinfo;
+	const struct xt_length_info *info = par->matchinfo;
 	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
 				 sizeof(struct ipv6hdr);
 
@@ -48,14 +42,14 @@ length_mt6(const struct sk_buff *skb, const struct net_device *in,
 static struct xt_match length_mt_reg[] __read_mostly = {
 	{
 		.name		= "length",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= length_mt,
 		.matchsize	= sizeof(struct xt_length_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "length",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= length_mt6,
 		.matchsize	= sizeof(struct xt_length_info),
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index aad9ab8d204..c908d69a559 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -58,13 +58,10 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-limit_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct xt_rateinfo *r =
-		((const struct xt_rateinfo *)matchinfo)->master;
+		((const struct xt_rateinfo *)par->matchinfo)->master;
 	unsigned long now = jiffies;
 
 	spin_lock_bh(&limit_lock);
@@ -95,12 +92,9 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
 }
 
-static bool
-limit_mt_check(const char *tablename, const void *inf,
-               const struct xt_match *match, void *matchinfo,
-               unsigned int hook_mask)
+static bool limit_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_rateinfo *r = matchinfo;
+	struct xt_rateinfo *r = par->matchinfo;
 
 	/* Check for overflow. */
 	if (r->burst == 0
@@ -167,43 +161,29 @@ static int limit_mt_compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_match limit_mt_reg[] __read_mostly = {
-	{
-		.name		= "limit",
-		.family		= AF_INET,
-		.checkentry	= limit_mt_check,
-		.match		= limit_mt,
-		.matchsize	= sizeof(struct xt_rateinfo),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_rateinfo),
-		.compat_from_user = limit_mt_compat_from_user,
-		.compat_to_user	= limit_mt_compat_to_user,
-#endif
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "limit",
-		.family		= AF_INET6,
-		.checkentry	= limit_mt_check,
-		.match		= limit_mt,
-		.matchsize	= sizeof(struct xt_rateinfo),
+static struct xt_match limit_mt_reg __read_mostly = {
+	.name             = "limit",
+	.revision         = 0,
+	.family           = NFPROTO_UNSPEC,
+	.match            = limit_mt,
+	.checkentry       = limit_mt_check,
+	.matchsize        = sizeof(struct xt_rateinfo),
 #ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_rateinfo),
-		.compat_from_user = limit_mt_compat_from_user,
-		.compat_to_user	= limit_mt_compat_to_user,
+	.compatsize       = sizeof(struct compat_xt_rateinfo),
+	.compat_from_user = limit_mt_compat_from_user,
+	.compat_to_user   = limit_mt_compat_to_user,
 #endif
-		.me		= THIS_MODULE,
-	},
+	.me               = THIS_MODULE,
 };
 
 static int __init limit_mt_init(void)
 {
-	return xt_register_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
+	return xt_register_match(&limit_mt_reg);
 }
 
 static void __exit limit_mt_exit(void)
 {
-	xt_unregister_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
+	xt_unregister_match(&limit_mt_reg);
 }
 
 module_init(limit_mt_init);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index b3e96a0ec17..c2007116ce5 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -24,12 +24,9 @@ MODULE_DESCRIPTION("Xtables: MAC address match");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
-static bool
-mac_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-    const struct xt_mac_info *info = matchinfo;
+    const struct xt_mac_info *info = par->matchinfo;
 
     /* Is mac pointer valid? */
     return skb_mac_header(skb) >= skb->head &&
@@ -39,37 +36,25 @@ mac_mt(const struct sk_buff *skb, const struct net_device *in,
 		^ info->invert);
 }
 
-static struct xt_match mac_mt_reg[] __read_mostly = {
-	{
-		.name		= "mac",
-		.family		= AF_INET,
-		.match		= mac_mt,
-		.matchsize	= sizeof(struct xt_mac_info),
-		.hooks		= (1 << NF_INET_PRE_ROUTING) |
-				  (1 << NF_INET_LOCAL_IN) |
-				  (1 << NF_INET_FORWARD),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "mac",
-		.family		= AF_INET6,
-		.match		= mac_mt,
-		.matchsize	= sizeof(struct xt_mac_info),
-		.hooks		= (1 << NF_INET_PRE_ROUTING) |
-				  (1 << NF_INET_LOCAL_IN) |
-				  (1 << NF_INET_FORWARD),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match mac_mt_reg __read_mostly = {
+	.name      = "mac",
+	.revision  = 0,
+	.family    = NFPROTO_UNSPEC,
+	.match     = mac_mt,
+	.matchsize = sizeof(struct xt_mac_info),
+	.hooks     = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
+	             (1 << NF_INET_FORWARD),
+	.me        = THIS_MODULE,
 };
 
 static int __init mac_mt_init(void)
 {
-	return xt_register_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
+	return xt_register_match(&mac_mt_reg);
 }
 
 static void __exit mac_mt_exit(void)
 {
-	xt_unregister_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
+	xt_unregister_match(&mac_mt_reg);
 }
 
 module_init(mac_mt_init);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 9f78f6120fb..10b9e34bbc5 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -23,32 +23,24 @@ MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
 
 static bool
-mark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protoff,
-           bool *hotdrop)
+mark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_mark_info *info = matchinfo;
+	const struct xt_mark_info *info = par->matchinfo;
 
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
 static bool
-mark_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_mark_mtinfo1 *info = matchinfo;
+	const struct xt_mark_mtinfo1 *info = par->matchinfo;
 
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool
-mark_mt_check_v0(const char *tablename, const void *entry,
-                 const struct xt_match *match, void *matchinfo,
-                 unsigned int hook_mask)
+static bool mark_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct xt_mark_info *minfo = matchinfo;
+	const struct xt_mark_info *minfo = par->matchinfo;
 
 	if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
 		printk(KERN_WARNING "mark: only supports 32bit mark\n");
@@ -92,7 +84,7 @@ static struct xt_match mark_mt_reg[] __read_mostly = {
 	{
 		.name		= "mark",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= mark_mt_check_v0,
 		.match		= mark_mt_v0,
 		.matchsize	= sizeof(struct xt_mark_info),
@@ -104,31 +96,9 @@ static struct xt_match mark_mt_reg[] __read_mostly = {
 		.me		= THIS_MODULE,
 	},
 	{
-		.name		= "mark",
-		.revision	= 0,
-		.family		= AF_INET6,
-		.checkentry	= mark_mt_check_v0,
-		.match		= mark_mt_v0,
-		.matchsize	= sizeof(struct xt_mark_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_info),
-		.compat_from_user = mark_mt_compat_from_user_v0,
-		.compat_to_user	= mark_mt_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE,
-	},
-	{
-		.name           = "mark",
-		.revision       = 1,
-		.family         = AF_INET,
-		.match          = mark_mt,
-		.matchsize      = sizeof(struct xt_mark_mtinfo1),
-		.me             = THIS_MODULE,
-	},
-	{
 		.name           = "mark",
 		.revision       = 1,
-		.family         = AF_INET6,
+		.family         = NFPROTO_UNSPEC,
 		.match          = mark_mt,
 		.matchsize      = sizeof(struct xt_mark_mtinfo1),
 		.me             = THIS_MODULE,
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index fd88c489b70..d06bb2dd390 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -95,25 +95,22 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 }
 
 static bool
-multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-                const struct net_device *out, const struct xt_match *match,
-                const void *matchinfo, int offset, unsigned int protoff,
-                bool *hotdrop)
+multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
-	const struct xt_multiport *multiinfo = matchinfo;
+	const struct xt_multiport *multiinfo = par->matchinfo;
 
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports);
+	pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
 	if (pptr == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
@@ -122,25 +119,22 @@ multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-multiport_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
-	const struct xt_multiport_v1 *multiinfo = matchinfo;
+	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports);
+	pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
 	if (pptr == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
@@ -164,50 +158,37 @@ check(u_int16_t proto,
 		&& count <= XT_MULTI_PORTS;
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-multiport_mt_check_v0(const char *tablename, const void *info,
-                      const struct xt_match *match, void *matchinfo,
-                      unsigned int hook_mask)
+static bool multiport_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct ipt_ip *ip = info;
-	const struct xt_multiport *multiinfo = matchinfo;
+	const struct ipt_ip *ip = par->entryinfo;
+	const struct xt_multiport *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
 		     multiinfo->count);
 }
 
-static bool
-multiport_mt_check(const char *tablename, const void *info,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool multiport_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ipt_ip *ip = info;
-	const struct xt_multiport_v1 *multiinfo = matchinfo;
+	const struct ipt_ip *ip = par->entryinfo;
+	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
 		     multiinfo->count);
 }
 
-static bool
-multiport_mt6_check_v0(const char *tablename, const void *info,
-                       const struct xt_match *match, void *matchinfo,
-                       unsigned int hook_mask)
+static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_ip6 *ip = info;
-	const struct xt_multiport *multiinfo = matchinfo;
+	const struct ip6t_ip6 *ip = par->entryinfo;
+	const struct xt_multiport *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
 		     multiinfo->count);
 }
 
-static bool
-multiport_mt6_check(const char *tablename, const void *info,
-                    const struct xt_match *match, void *matchinfo,
-                    unsigned int hook_mask)
+static bool multiport_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_ip6 *ip = info;
-	const struct xt_multiport_v1 *multiinfo = matchinfo;
+	const struct ip6t_ip6 *ip = par->entryinfo;
+	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
 		     multiinfo->count);
@@ -216,7 +197,7 @@ multiport_mt6_check(const char *tablename, const void *info,
 static struct xt_match multiport_mt_reg[] __read_mostly = {
 	{
 		.name		= "multiport",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.revision	= 0,
 		.checkentry	= multiport_mt_check_v0,
 		.match		= multiport_mt_v0,
@@ -225,7 +206,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "multiport",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.revision	= 1,
 		.checkentry	= multiport_mt_check,
 		.match		= multiport_mt,
@@ -234,7 +215,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "multiport",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.revision	= 0,
 		.checkentry	= multiport_mt6_check_v0,
 		.match		= multiport_mt_v0,
@@ -243,7 +224,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "multiport",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.revision	= 1,
 		.checkentry	= multiport_mt6_check,
 		.match		= multiport_mt,
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 9059c16144c..f19ebd9b78f 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -21,12 +21,9 @@
 #include <linux/netfilter_ipv6/ip6t_owner.h>
 
 static bool
-owner_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, const struct xt_match *match,
-            const void *matchinfo, int offset, unsigned int protoff,
-            bool *hotdrop)
+owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_owner_info *info = matchinfo;
+	const struct ipt_owner_info *info = par->matchinfo;
 	const struct file *filp;
 
 	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
@@ -50,12 +47,9 @@ owner_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ip6t_owner_info *info = matchinfo;
+	const struct ip6t_owner_info *info = par->matchinfo;
 	const struct file *filp;
 
 	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
@@ -79,12 +73,9 @@ owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-owner_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_owner_match_info *info = matchinfo;
+	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
 
 	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
@@ -116,12 +107,9 @@ owner_mt(const struct sk_buff *skb, const struct net_device *in,
 	return true;
 }
 
-static bool
-owner_mt_check_v0(const char *tablename, const void *ip,
-                  const struct xt_match *match, void *matchinfo,
-                  unsigned int hook_mask)
+static bool owner_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct ipt_owner_info *info = matchinfo;
+	const struct ipt_owner_info *info = par->matchinfo;
 
 	if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) {
 		printk(KERN_WARNING KBUILD_MODNAME
@@ -133,12 +121,9 @@ owner_mt_check_v0(const char *tablename, const void *ip,
 	return true;
 }
 
-static bool
-owner_mt6_check_v0(const char *tablename, const void *ip,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool owner_mt6_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_owner_info *info = matchinfo;
+	const struct ip6t_owner_info *info = par->matchinfo;
 
 	if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
 		printk(KERN_WARNING KBUILD_MODNAME
@@ -153,7 +138,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = {
 	{
 		.name       = "owner",
 		.revision   = 0,
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.match      = owner_mt_v0,
 		.matchsize  = sizeof(struct ipt_owner_info),
 		.checkentry = owner_mt_check_v0,
@@ -164,7 +149,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = {
 	{
 		.name       = "owner",
 		.revision   = 0,
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.match      = owner_mt6_v0,
 		.matchsize  = sizeof(struct ip6t_owner_info),
 		.checkentry = owner_mt6_check_v0,
@@ -175,17 +160,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = {
 	{
 		.name       = "owner",
 		.revision   = 1,
-		.family     = AF_INET,
-		.match      = owner_mt,
-		.matchsize  = sizeof(struct xt_owner_match_info),
-		.hooks      = (1 << NF_INET_LOCAL_OUT) |
-		              (1 << NF_INET_POST_ROUTING),
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "owner",
-		.revision   = 1,
-		.family     = AF_INET6,
+		.family     = NFPROTO_UNSPEC,
 		.match      = owner_mt,
 		.matchsize  = sizeof(struct xt_owner_match_info),
 		.hooks      = (1 << NF_INET_LOCAL_OUT) |
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 4ec1094bda9..1bcdfc12cf5 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -21,14 +21,11 @@ MODULE_ALIAS("ipt_physdev");
 MODULE_ALIAS("ip6t_physdev");
 
 static bool
-physdev_mt(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protoff,
-           bool *hotdrop)
+physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	int i;
 	static const char nulldevname[IFNAMSIZ];
-	const struct xt_physdev_info *info = matchinfo;
+	const struct xt_physdev_info *info = par->matchinfo;
 	bool ret;
 	const char *indev, *outdev;
 	const struct nf_bridge_info *nf_bridge;
@@ -94,12 +91,9 @@ match_outdev:
 	return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT);
 }
 
-static bool
-physdev_mt_check(const char *tablename, const void *ip,
-                 const struct xt_match *match, void *matchinfo,
-                 unsigned int hook_mask)
+static bool physdev_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_physdev_info *info = matchinfo;
+	const struct xt_physdev_info *info = par->matchinfo;
 
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
@@ -107,44 +101,35 @@ physdev_mt_check(const char *tablename, const void *ip,
 	if (info->bitmask & XT_PHYSDEV_OP_OUT &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
-	    hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
-			 (1 << NF_INET_POST_ROUTING))) {
+	    par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
+	    (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
 		printk(KERN_WARNING "physdev match: using --physdev-out in the "
 		       "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
 		       "traffic is not supported anymore.\n");
-		if (hook_mask & (1 << NF_INET_LOCAL_OUT))
+		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
 			return false;
 	}
 	return true;
 }
 
-static struct xt_match physdev_mt_reg[] __read_mostly = {
-	{
-		.name		= "physdev",
-		.family		= AF_INET,
-		.checkentry	= physdev_mt_check,
-		.match		= physdev_mt,
-		.matchsize	= sizeof(struct xt_physdev_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "physdev",
-		.family		= AF_INET6,
-		.checkentry	= physdev_mt_check,
-		.match		= physdev_mt,
-		.matchsize	= sizeof(struct xt_physdev_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match physdev_mt_reg __read_mostly = {
+	.name       = "physdev",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = physdev_mt_check,
+	.match      = physdev_mt,
+	.matchsize  = sizeof(struct xt_physdev_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init physdev_mt_init(void)
 {
-	return xt_register_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
+	return xt_register_match(&physdev_mt_reg);
 }
 
 static void __exit physdev_mt_exit(void)
 {
-	xt_unregister_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
+	xt_unregister_match(&physdev_mt_reg);
 }
 
 module_init(physdev_mt_init);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 7936f7e2325..69da1d3a1d8 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -23,20 +23,17 @@ MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
 static bool
-pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protoff,
-           bool *hotdrop)
+pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_pkttype_info *info = matchinfo;
+	const struct xt_pkttype_info *info = par->matchinfo;
 	u_int8_t type;
 
 	if (skb->pkt_type != PACKET_LOOPBACK)
 		type = skb->pkt_type;
-	else if (match->family == AF_INET &&
+	else if (par->family == NFPROTO_IPV4 &&
 	    ipv4_is_multicast(ip_hdr(skb)->daddr))
 		type = PACKET_MULTICAST;
-	else if (match->family == AF_INET6 &&
+	else if (par->family == NFPROTO_IPV6 &&
 	    ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
 		type = PACKET_MULTICAST;
 	else
@@ -45,31 +42,23 @@ pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
 	return (type == info->pkttype) ^ info->invert;
 }
 
-static struct xt_match pkttype_mt_reg[] __read_mostly = {
-	{
-		.name		= "pkttype",
-		.family		= AF_INET,
-		.match		= pkttype_mt,
-		.matchsize	= sizeof(struct xt_pkttype_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "pkttype",
-		.family		= AF_INET6,
-		.match		= pkttype_mt,
-		.matchsize	= sizeof(struct xt_pkttype_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match pkttype_mt_reg __read_mostly = {
+	.name      = "pkttype",
+	.revision  = 0,
+	.family    = NFPROTO_UNSPEC,
+	.match     = pkttype_mt,
+	.matchsize = sizeof(struct xt_pkttype_info),
+	.me        = THIS_MODULE,
 };
 
 static int __init pkttype_mt_init(void)
 {
-	return xt_register_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
+	return xt_register_match(&pkttype_mt_reg);
 }
 
 static void __exit pkttype_mt_exit(void)
 {
-	xt_unregister_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
+	xt_unregister_match(&pkttype_mt_reg);
 }
 
 module_init(pkttype_mt_init);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index d351582b2a3..328bd20ddd2 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -26,9 +26,9 @@ xt_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *m,
 	    const union nf_inet_addr *a2, unsigned short family)
 {
 	switch (family) {
-	case AF_INET:
+	case NFPROTO_IPV4:
 		return ((a1->ip ^ a2->ip) & m->ip) == 0;
-	case AF_INET6:
+	case NFPROTO_IPV6:
 		return ipv6_masked_addr_cmp(&a1->in6, &m->in6, &a2->in6) == 0;
 	}
 	return false;
@@ -110,18 +110,15 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 }
 
 static bool
-policy_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_policy_info *info = matchinfo;
+	const struct xt_policy_info *info = par->matchinfo;
 	int ret;
 
 	if (info->flags & XT_POLICY_MATCH_IN)
-		ret = match_policy_in(skb, info, match->family);
+		ret = match_policy_in(skb, info, par->match->family);
 	else
-		ret = match_policy_out(skb, info, match->family);
+		ret = match_policy_out(skb, info, par->match->family);
 
 	if (ret < 0)
 		ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
@@ -131,26 +128,23 @@ policy_mt(const struct sk_buff *skb, const struct net_device *in,
 	return ret;
 }
 
-static bool
-policy_mt_check(const char *tablename, const void *ip_void,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
+static bool policy_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_policy_info *info = matchinfo;
+	const struct xt_policy_info *info = par->matchinfo;
 
 	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
 		printk(KERN_ERR "xt_policy: neither incoming nor "
 				"outgoing policy selected\n");
 		return false;
 	}
-	if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN)
-	    && info->flags & XT_POLICY_MATCH_OUT) {
+	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
+	    (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
 		printk(KERN_ERR "xt_policy: output policy not valid in "
 				"PRE_ROUTING and INPUT\n");
 		return false;
 	}
-	if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT)
-	    && info->flags & XT_POLICY_MATCH_IN) {
+	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
+	    (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
 		printk(KERN_ERR "xt_policy: input policy not valid in "
 				"POST_ROUTING and OUTPUT\n");
 		return false;
@@ -165,7 +159,7 @@ policy_mt_check(const char *tablename, const void *ip_void,
 static struct xt_match policy_mt_reg[] __read_mostly = {
 	{
 		.name		= "policy",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry 	= policy_mt_check,
 		.match		= policy_mt,
 		.matchsize	= sizeof(struct xt_policy_info),
@@ -173,7 +167,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "policy",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= policy_mt_check,
 		.match		= policy_mt,
 		.matchsize	= sizeof(struct xt_policy_info),
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 3b021d0c522..c84fce5e0f3 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -18,13 +18,10 @@ MODULE_ALIAS("ip6t_quota");
 static DEFINE_SPINLOCK(quota_lock);
 
 static bool
-quota_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct xt_quota_info *q =
-		((const struct xt_quota_info *)matchinfo)->master;
+		((const struct xt_quota_info *)par->matchinfo)->master;
 	bool ret = q->flags & XT_QUOTA_INVERT;
 
 	spin_lock_bh(&quota_lock);
@@ -40,12 +37,9 @@ quota_mt(const struct sk_buff *skb, const struct net_device *in,
 	return ret;
 }
 
-static bool
-quota_mt_check(const char *tablename, const void *entry,
-               const struct xt_match *match, void *matchinfo,
-               unsigned int hook_mask)
+static bool quota_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_quota_info *q = matchinfo;
+	struct xt_quota_info *q = par->matchinfo;
 
 	if (q->flags & ~XT_QUOTA_MASK)
 		return false;
@@ -54,33 +48,24 @@ quota_mt_check(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_match quota_mt_reg[] __read_mostly = {
-	{
-		.name		= "quota",
-		.family		= AF_INET,
-		.checkentry	= quota_mt_check,
-		.match		= quota_mt,
-		.matchsize	= sizeof(struct xt_quota_info),
-		.me		= THIS_MODULE
-	},
-	{
-		.name		= "quota",
-		.family		= AF_INET6,
-		.checkentry	= quota_mt_check,
-		.match		= quota_mt,
-		.matchsize	= sizeof(struct xt_quota_info),
-		.me		= THIS_MODULE
-	},
+static struct xt_match quota_mt_reg __read_mostly = {
+	.name       = "quota",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = quota_mt,
+	.checkentry = quota_mt_check,
+	.matchsize  = sizeof(struct xt_quota_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init quota_mt_init(void)
 {
-	return xt_register_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
+	return xt_register_match(&quota_mt_reg);
 }
 
 static void __exit quota_mt_exit(void)
 {
-	xt_unregister_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
+	xt_unregister_match(&quota_mt_reg);
 }
 
 module_init(quota_mt_init);
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index ebd84f1b4f6..220a1d588ee 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -14,16 +14,10 @@
 #include <net/netfilter/xt_rateest.h>
 
 
-static bool xt_rateest_mt(const struct sk_buff *skb,
-			  const struct net_device *in,
-			  const struct net_device *out,
-			  const struct xt_match *match,
-			  const void *matchinfo,
-			  int offset,
-			  unsigned int protoff,
-			  bool *hotdrop)
+static bool
+xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_rateest_match_info *info = matchinfo;
+	const struct xt_rateest_match_info *info = par->matchinfo;
 	struct gnet_stats_rate_est *r;
 	u_int32_t bps1, bps2, pps1, pps2;
 	bool ret = true;
@@ -80,13 +74,9 @@ static bool xt_rateest_mt(const struct sk_buff *skb,
 	return ret;
 }
 
-static bool xt_rateest_mt_checkentry(const char *tablename,
-				     const void *ip,
-				     const struct xt_match *match,
-				     void *matchinfo,
-				     unsigned int hook_mask)
+static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 {
-	struct xt_rateest_match_info *info = matchinfo;
+	struct xt_rateest_match_info *info = par->matchinfo;
 	struct xt_rateest *est1, *est2;
 
 	if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
@@ -127,46 +117,34 @@ err1:
 	return false;
 }
 
-static void xt_rateest_mt_destroy(const struct xt_match *match,
-				  void *matchinfo)
+static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	struct xt_rateest_match_info *info = matchinfo;
+	struct xt_rateest_match_info *info = par->matchinfo;
 
 	xt_rateest_put(info->est1);
 	if (info->est2)
 		xt_rateest_put(info->est2);
 }
 
-static struct xt_match xt_rateest_match[] __read_mostly = {
-	{
-		.family		= AF_INET,
-		.name		= "rateest",
-		.match		= xt_rateest_mt,
-		.checkentry	= xt_rateest_mt_checkentry,
-		.destroy	= xt_rateest_mt_destroy,
-		.matchsize	= sizeof(struct xt_rateest_match_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.family		= AF_INET6,
-		.name		= "rateest",
-		.match		= xt_rateest_mt,
-		.checkentry	= xt_rateest_mt_checkentry,
-		.destroy	= xt_rateest_mt_destroy,
-		.matchsize	= sizeof(struct xt_rateest_match_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match xt_rateest_mt_reg __read_mostly = {
+	.name       = "rateest",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = xt_rateest_mt,
+	.checkentry = xt_rateest_mt_checkentry,
+	.destroy    = xt_rateest_mt_destroy,
+	.matchsize  = sizeof(struct xt_rateest_match_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init xt_rateest_mt_init(void)
 {
-	return xt_register_matches(xt_rateest_match,
-				   ARRAY_SIZE(xt_rateest_match));
+	return xt_register_match(&xt_rateest_mt_reg);
 }
 
 static void __exit xt_rateest_mt_fini(void)
 {
-	xt_unregister_matches(xt_rateest_match, ARRAY_SIZE(xt_rateest_match));
+	xt_unregister_match(&xt_rateest_mt_reg);
 }
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 7df1627c536..67419287bc7 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,12 +22,9 @@ MODULE_DESCRIPTION("Xtables: Routing realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
-realm_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+realm_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_realm_info *info = matchinfo;
+	const struct xt_realm_info *info = par->matchinfo;
 	const struct dst_entry *dst = skb->dst;
 
 	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
@@ -39,7 +36,7 @@ static struct xt_match realm_mt_reg __read_mostly = {
 	.matchsize	= sizeof(struct xt_realm_info),
 	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) |
 			  (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN),
-	.family		= AF_INET,
+	.family		= NFPROTO_UNSPEC,
 	.me		= THIS_MODULE
 };
 
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
new file mode 100644
index 00000000000..4ebd4ca9a99
--- /dev/null
+++ b/net/netfilter/xt_recent.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This is a replacement of the old ipt_recent module, which carried the
+ * following copyright notice:
+ *
+ * Author: Stephen Frost <sfrost@snowman.net>
+ * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
+ */
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/bitops.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <net/net_namespace.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_recent.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_recent");
+MODULE_ALIAS("ip6t_recent");
+
+static unsigned int ip_list_tot = 100;
+static unsigned int ip_pkt_list_tot = 20;
+static unsigned int ip_list_hash_size = 0;
+static unsigned int ip_list_perms = 0644;
+static unsigned int ip_list_uid = 0;
+static unsigned int ip_list_gid = 0;
+module_param(ip_list_tot, uint, 0400);
+module_param(ip_pkt_list_tot, uint, 0400);
+module_param(ip_list_hash_size, uint, 0400);
+module_param(ip_list_perms, uint, 0400);
+module_param(ip_list_uid, uint, 0400);
+module_param(ip_list_gid, uint, 0400);
+MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
+MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
+MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
+MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/xt_recent/* files");
+
+struct recent_entry {
+	struct list_head	list;
+	struct list_head	lru_list;
+	union nf_inet_addr	addr;
+	u_int16_t		family;
+	u_int8_t		ttl;
+	u_int8_t		index;
+	u_int16_t		nstamps;
+	unsigned long		stamps[0];
+};
+
+struct recent_table {
+	struct list_head	list;
+	char			name[XT_RECENT_NAME_LEN];
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*proc_old, *proc;
+#endif
+	unsigned int		refcnt;
+	unsigned int		entries;
+	struct list_head	lru_list;
+	struct list_head	iphash[0];
+};
+
+static LIST_HEAD(tables);
+static DEFINE_SPINLOCK(recent_lock);
+static DEFINE_MUTEX(recent_mutex);
+
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+static struct proc_dir_entry *proc_old_dir;
+#endif
+static struct proc_dir_entry *recent_proc_dir;
+static const struct file_operations recent_old_fops, recent_mt_fops;
+#endif
+
+static u_int32_t hash_rnd;
+static bool hash_rnd_initted;
+
+static unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
+{
+	if (!hash_rnd_initted) {
+		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+		hash_rnd_initted = true;
+	}
+	return jhash_1word((__force u32)addr->ip, hash_rnd) &
+	       (ip_list_hash_size - 1);
+}
+
+static unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
+{
+	if (!hash_rnd_initted) {
+		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+		hash_rnd_initted = true;
+	}
+	return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) &
+	       (ip_list_hash_size - 1);
+}
+
+static struct recent_entry *
+recent_entry_lookup(const struct recent_table *table,
+		    const union nf_inet_addr *addrp, u_int16_t family,
+		    u_int8_t ttl)
+{
+	struct recent_entry *e;
+	unsigned int h;
+
+	if (family == NFPROTO_IPV4)
+		h = recent_entry_hash4(addrp);
+	else
+		h = recent_entry_hash6(addrp);
+
+	list_for_each_entry(e, &table->iphash[h], list)
+		if (e->family == family &&
+		    memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 &&
+		    (ttl == e->ttl || ttl == 0 || e->ttl == 0))
+			return e;
+	return NULL;
+}
+
+static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
+{
+	list_del(&e->list);
+	list_del(&e->lru_list);
+	kfree(e);
+	t->entries--;
+}
+
+static struct recent_entry *
+recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
+		  u_int16_t family, u_int8_t ttl)
+{
+	struct recent_entry *e;
+
+	if (t->entries >= ip_list_tot) {
+		e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
+		recent_entry_remove(t, e);
+	}
+	e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * ip_pkt_list_tot,
+		    GFP_ATOMIC);
+	if (e == NULL)
+		return NULL;
+	memcpy(&e->addr, addr, sizeof(e->addr));
+	e->ttl       = ttl;
+	e->stamps[0] = jiffies;
+	e->nstamps   = 1;
+	e->index     = 1;
+	e->family    = family;
+	if (family == NFPROTO_IPV4)
+		list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]);
+	else
+		list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]);
+	list_add_tail(&e->lru_list, &t->lru_list);
+	t->entries++;
+	return e;
+}
+
+static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
+{
+	e->stamps[e->index++] = jiffies;
+	if (e->index > e->nstamps)
+		e->nstamps = e->index;
+	e->index %= ip_pkt_list_tot;
+	list_move_tail(&e->lru_list, &t->lru_list);
+}
+
+static struct recent_table *recent_table_lookup(const char *name)
+{
+	struct recent_table *t;
+
+	list_for_each_entry(t, &tables, list)
+		if (!strcmp(t->name, name))
+			return t;
+	return NULL;
+}
+
+static void recent_table_flush(struct recent_table *t)
+{
+	struct recent_entry *e, *next;
+	unsigned int i;
+
+	for (i = 0; i < ip_list_hash_size; i++)
+		list_for_each_entry_safe(e, next, &t->iphash[i], list)
+			recent_entry_remove(t, e);
+}
+
+static bool
+recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+	const struct xt_recent_mtinfo *info = par->matchinfo;
+	struct recent_table *t;
+	struct recent_entry *e;
+	union nf_inet_addr addr = {};
+	u_int8_t ttl;
+	bool ret = info->invert;
+
+	if (par->match->family == NFPROTO_IPV4) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		if (info->side == XT_RECENT_DEST)
+			addr.ip = iph->daddr;
+		else
+			addr.ip = iph->saddr;
+
+		ttl = iph->ttl;
+	} else {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		if (info->side == XT_RECENT_DEST)
+			memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6));
+		else
+			memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6));
+
+		ttl = iph->hop_limit;
+	}
+
+	/* use TTL as seen before forwarding */
+	if (par->out != NULL && skb->sk == NULL)
+		ttl++;
+
+	spin_lock_bh(&recent_lock);
+	t = recent_table_lookup(info->name);
+	e = recent_entry_lookup(t, &addr, par->match->family,
+				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
+	if (e == NULL) {
+		if (!(info->check_set & XT_RECENT_SET))
+			goto out;
+		e = recent_entry_init(t, &addr, par->match->family, ttl);
+		if (e == NULL)
+			*par->hotdrop = true;
+		ret = !ret;
+		goto out;
+	}
+
+	if (info->check_set & XT_RECENT_SET)
+		ret = !ret;
+	else if (info->check_set & XT_RECENT_REMOVE) {
+		recent_entry_remove(t, e);
+		ret = !ret;
+	} else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) {
+		unsigned long time = jiffies - info->seconds * HZ;
+		unsigned int i, hits = 0;
+
+		for (i = 0; i < e->nstamps; i++) {
+			if (info->seconds && time_after(time, e->stamps[i]))
+				continue;
+			if (++hits >= info->hit_count) {
+				ret = !ret;
+				break;
+			}
+		}
+	}
+
+	if (info->check_set & XT_RECENT_SET ||
+	    (info->check_set & XT_RECENT_UPDATE && ret)) {
+		recent_entry_update(t, e);
+		e->ttl = ttl;
+	}
+out:
+	spin_unlock_bh(&recent_lock);
+	return ret;
+}
+
+static bool recent_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_recent_mtinfo *info = par->matchinfo;
+	struct recent_table *t;
+	unsigned i;
+	bool ret = false;
+
+	if (hweight8(info->check_set &
+		     (XT_RECENT_SET | XT_RECENT_REMOVE |
+		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
+		return false;
+	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
+	    (info->seconds || info->hit_count))
+		return false;
+	if (info->hit_count > ip_pkt_list_tot)
+		return false;
+	if (info->name[0] == '\0' ||
+	    strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
+		return false;
+
+	mutex_lock(&recent_mutex);
+	t = recent_table_lookup(info->name);
+	if (t != NULL) {
+		t->refcnt++;
+		ret = true;
+		goto out;
+	}
+
+	t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
+		    GFP_KERNEL);
+	if (t == NULL)
+		goto out;
+	t->refcnt = 1;
+	strcpy(t->name, info->name);
+	INIT_LIST_HEAD(&t->lru_list);
+	for (i = 0; i < ip_list_hash_size; i++)
+		INIT_LIST_HEAD(&t->iphash[i]);
+#ifdef CONFIG_PROC_FS
+	t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir,
+		  &recent_mt_fops);
+	if (t->proc == NULL) {
+		kfree(t);
+		goto out;
+	}
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir,
+		      &recent_old_fops);
+	if (t->proc_old == NULL) {
+		remove_proc_entry(t->name, proc_old_dir);
+		kfree(t);
+		goto out;
+	}
+	t->proc_old->uid   = ip_list_uid;
+	t->proc_old->gid   = ip_list_gid;
+	t->proc_old->data  = t;
+#endif
+	t->proc->uid       = ip_list_uid;
+	t->proc->gid       = ip_list_gid;
+	t->proc->data      = t;
+#endif
+	spin_lock_bh(&recent_lock);
+	list_add_tail(&t->list, &tables);
+	spin_unlock_bh(&recent_lock);
+	ret = true;
+out:
+	mutex_unlock(&recent_mutex);
+	return ret;
+}
+
+static void recent_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_recent_mtinfo *info = par->matchinfo;
+	struct recent_table *t;
+
+	mutex_lock(&recent_mutex);
+	t = recent_table_lookup(info->name);
+	if (--t->refcnt == 0) {
+		spin_lock_bh(&recent_lock);
+		list_del(&t->list);
+		spin_unlock_bh(&recent_lock);
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+		remove_proc_entry(t->name, proc_old_dir);
+#endif
+		remove_proc_entry(t->name, recent_proc_dir);
+#endif
+		recent_table_flush(t);
+		kfree(t);
+	}
+	mutex_unlock(&recent_mutex);
+}
+
+#ifdef CONFIG_PROC_FS
+struct recent_iter_state {
+	const struct recent_table *table;
+	unsigned int		bucket;
+};
+
+static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(recent_lock)
+{
+	struct recent_iter_state *st = seq->private;
+	const struct recent_table *t = st->table;
+	struct recent_entry *e;
+	loff_t p = *pos;
+
+	spin_lock_bh(&recent_lock);
+
+	for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++)
+		list_for_each_entry(e, &t->iphash[st->bucket], list)
+			if (p-- == 0)
+				return e;
+	return NULL;
+}
+
+static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct recent_iter_state *st = seq->private;
+	const struct recent_table *t = st->table;
+	const struct recent_entry *e = v;
+	const struct list_head *head = e->list.next;
+
+	while (head == &t->iphash[st->bucket]) {
+		if (++st->bucket >= ip_list_hash_size)
+			return NULL;
+		head = t->iphash[st->bucket].next;
+	}
+	(*pos)++;
+	return list_entry(head, struct recent_entry, list);
+}
+
+static void recent_seq_stop(struct seq_file *s, void *v)
+	__releases(recent_lock)
+{
+	spin_unlock_bh(&recent_lock);
+}
+
+static int recent_seq_show(struct seq_file *seq, void *v)
+{
+	const struct recent_entry *e = v;
+	unsigned int i;
+
+	i = (e->index - 1) % ip_pkt_list_tot;
+	if (e->family == NFPROTO_IPV4)
+		seq_printf(seq, "src=" NIPQUAD_FMT " ttl: %u last_seen: %lu "
+			   "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl,
+			   e->stamps[i], e->index);
+	else
+		seq_printf(seq, "src=" NIP6_FMT " ttl: %u last_seen: %lu "
+			   "oldest_pkt: %u", NIP6(e->addr.in6), e->ttl,
+			   e->stamps[i], e->index);
+	for (i = 0; i < e->nstamps; i++)
+		seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
+	seq_printf(seq, "\n");
+	return 0;
+}
+
+static const struct seq_operations recent_seq_ops = {
+	.start		= recent_seq_start,
+	.next		= recent_seq_next,
+	.stop		= recent_seq_stop,
+	.show		= recent_seq_show,
+};
+
+static int recent_seq_open(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *pde = PDE(inode);
+	struct recent_iter_state *st;
+
+	st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
+	if (st == NULL)
+		return -ENOMEM;
+
+	st->table    = pde->data;
+	return 0;
+}
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+static int recent_old_seq_open(struct inode *inode, struct file *filp)
+{
+	static bool warned_of_old;
+
+	if (unlikely(!warned_of_old)) {
+		printk(KERN_INFO KBUILD_MODNAME ": Use of /proc/net/ipt_recent"
+		       " is deprecated; use /proc/net/xt_recent.\n");
+		warned_of_old = true;
+	}
+	return recent_seq_open(inode, filp);
+}
+
+static ssize_t recent_old_proc_write(struct file *file,
+				     const char __user *input,
+				     size_t size, loff_t *loff)
+{
+	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct recent_table *t = pde->data;
+	struct recent_entry *e;
+	char buf[sizeof("+255.255.255.255")], *c = buf;
+	__be32 addr;
+	int add;
+
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+	if (copy_from_user(buf, input, size))
+		return -EFAULT;
+
+	while (isspace(*c))
+		c++;
+
+	if (size - (c - buf) < 5)
+		return c - buf;
+	if (!strncmp(c, "clear", 5)) {
+		c += 5;
+		spin_lock_bh(&recent_lock);
+		recent_table_flush(t);
+		spin_unlock_bh(&recent_lock);
+		return c - buf;
+	}
+
+	switch (*c) {
+	case '-':
+		add = 0;
+		c++;
+		break;
+	case '+':
+		c++;
+	default:
+		add = 1;
+		break;
+	}
+	addr = in_aton(c);
+
+	spin_lock_bh(&recent_lock);
+	e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0);
+	if (e == NULL) {
+		if (add)
+			recent_entry_init(t, (const void *)&addr,
+					  NFPROTO_IPV4, 0);
+	} else {
+		if (add)
+			recent_entry_update(t, e);
+		else
+			recent_entry_remove(t, e);
+	}
+	spin_unlock_bh(&recent_lock);
+	return size;
+}
+
+static const struct file_operations recent_old_fops = {
+	.open		= recent_old_seq_open,
+	.read		= seq_read,
+	.write		= recent_old_proc_write,
+	.release	= seq_release_private,
+	.owner		= THIS_MODULE,
+};
+#endif
+
+static ssize_t
+recent_mt_proc_write(struct file *file, const char __user *input,
+		     size_t size, loff_t *loff)
+{
+	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct recent_table *t = pde->data;
+	struct recent_entry *e;
+	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
+	const char *c = buf;
+	union nf_inet_addr addr;
+	u_int16_t family;
+	bool add, succ;
+
+	if (size == 0)
+		return 0;
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+	if (copy_from_user(buf, input, size) != 0)
+		return -EFAULT;
+
+	/* Strict protocol! */
+	if (*loff != 0)
+		return -ESPIPE;
+	switch (*c) {
+	case '/': /* flush table */
+		spin_lock_bh(&recent_lock);
+		recent_table_flush(t);
+		spin_unlock_bh(&recent_lock);
+		return size;
+	case '-': /* remove address */
+		add = false;
+		break;
+	case '+': /* add address */
+		add = true;
+		break;
+	default:
+		printk(KERN_INFO KBUILD_MODNAME ": Need +ip, -ip or /\n");
+		return -EINVAL;
+	}
+
+	++c;
+	--size;
+	if (strnchr(c, size, ':') != NULL) {
+		family = NFPROTO_IPV6;
+		succ   = in6_pton(c, size, (void *)&addr, '\n', NULL);
+	} else {
+		family = NFPROTO_IPV4;
+		succ   = in4_pton(c, size, (void *)&addr, '\n', NULL);
+	}
+
+	if (!succ) {
+		printk(KERN_INFO KBUILD_MODNAME ": illegal address written "
+		       "to procfs\n");
+		return -EINVAL;
+	}
+
+	spin_lock_bh(&recent_lock);
+	e = recent_entry_lookup(t, &addr, family, 0);
+	if (e == NULL) {
+		if (add)
+			recent_entry_init(t, &addr, family, 0);
+	} else {
+		if (add)
+			recent_entry_update(t, e);
+		else
+			recent_entry_remove(t, e);
+	}
+	spin_unlock_bh(&recent_lock);
+	/* Note we removed one above */
+	*loff += size + 1;
+	return size + 1;
+}
+
+static const struct file_operations recent_mt_fops = {
+	.open    = recent_seq_open,
+	.read    = seq_read,
+	.write   = recent_mt_proc_write,
+	.release = seq_release_private,
+	.owner   = THIS_MODULE,
+};
+#endif /* CONFIG_PROC_FS */
+
+static struct xt_match recent_mt_reg[] __read_mostly = {
+	{
+		.name       = "recent",
+		.revision   = 0,
+		.family     = NFPROTO_IPV4,
+		.match      = recent_mt,
+		.matchsize  = sizeof(struct xt_recent_mtinfo),
+		.checkentry = recent_mt_check,
+		.destroy    = recent_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "recent",
+		.revision   = 0,
+		.family     = NFPROTO_IPV6,
+		.match      = recent_mt,
+		.matchsize  = sizeof(struct xt_recent_mtinfo),
+		.checkentry = recent_mt_check,
+		.destroy    = recent_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init recent_mt_init(void)
+{
+	int err;
+
+	if (!ip_list_tot || !ip_pkt_list_tot || ip_pkt_list_tot > 255)
+		return -EINVAL;
+	ip_list_hash_size = 1 << fls(ip_list_tot);
+
+	err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+#ifdef CONFIG_PROC_FS
+	if (err)
+		return err;
+	recent_proc_dir = proc_mkdir("xt_recent", init_net.proc_net);
+	if (recent_proc_dir == NULL) {
+		xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+		err = -ENOMEM;
+	}
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	if (err < 0)
+		return err;
+	proc_old_dir = proc_mkdir("ipt_recent", init_net.proc_net);
+	if (proc_old_dir == NULL) {
+		remove_proc_entry("xt_recent", init_net.proc_net);
+		xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+		err = -ENOMEM;
+	}
+#endif
+#endif
+	return err;
+}
+
+static void __exit recent_mt_exit(void)
+{
+	BUG_ON(!list_empty(&tables));
+	xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	remove_proc_entry("ipt_recent", init_net.proc_net);
+#endif
+	remove_proc_entry("xt_recent", init_net.proc_net);
+#endif
+}
+
+module_init(recent_mt_init);
+module_exit(recent_mt_exit);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index e6e4681fa04..e223cb43ae8 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -117,23 +117,21 @@ match_packet(const struct sk_buff *skb,
 }
 
 static bool
-sctp_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_sctp_info *info = matchinfo;
+	const struct xt_sctp_info *info = par->matchinfo;
 	const sctp_sctphdr_t *sh;
 	sctp_sctphdr_t _sh;
 
-	if (offset) {
+	if (par->fragoff != 0) {
 		duprintf("Dropping non-first fragment.. FIXME\n");
 		return false;
 	}
 
-	sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh);
+	sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
 		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 	duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
@@ -144,17 +142,14 @@ sctp_mt(const struct sk_buff *skb, const struct net_device *in,
 		&& SCCHECK(ntohs(sh->dest) >= info->dpts[0]
 			&& ntohs(sh->dest) <= info->dpts[1],
 			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
-		&& SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t),
-					info, hotdrop),
+		&& SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
+					info, par->hotdrop),
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
-static bool
-sctp_mt_check(const char *tablename, const void *inf,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+static bool sctp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_sctp_info *info = matchinfo;
+	const struct xt_sctp_info *info = par->matchinfo;
 
 	return !(info->flags & ~XT_SCTP_VALID_FLAGS)
 		&& !(info->invflags & ~XT_SCTP_VALID_FLAGS)
@@ -169,7 +164,7 @@ sctp_mt_check(const char *tablename, const void *inf,
 static struct xt_match sctp_mt_reg[] __read_mostly = {
 	{
 		.name		= "sctp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= sctp_mt_check,
 		.match		= sctp_mt,
 		.matchsize	= sizeof(struct xt_sctp_info),
@@ -178,7 +173,7 @@ static struct xt_match sctp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "sctp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= sctp_mt_check,
 		.match		= sctp_mt,
 		.matchsize	= sizeof(struct xt_sctp_info),
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
new file mode 100644
index 00000000000..02a8fed2108
--- /dev/null
+++ b/net/netfilter/xt_socket.c
@@ -0,0 +1,185 @@
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/netfilter/nf_tproxy_core.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#define XT_SOCKET_HAVE_CONNTRACK 1
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static int
+extract_icmp_fields(const struct sk_buff *skb,
+		    u8 *protocol,
+		    __be32 *raddr,
+		    __be32 *laddr,
+		    __be16 *rport,
+		    __be16 *lport)
+{
+	unsigned int outside_hdrlen = ip_hdrlen(skb);
+	struct iphdr *inside_iph, _inside_iph;
+	struct icmphdr *icmph, _icmph;
+	__be16 *ports, _ports[2];
+
+	icmph = skb_header_pointer(skb, outside_hdrlen,
+				   sizeof(_icmph), &_icmph);
+	if (icmph == NULL)
+		return 1;
+
+	switch (icmph->type) {
+	case ICMP_DEST_UNREACH:
+	case ICMP_SOURCE_QUENCH:
+	case ICMP_REDIRECT:
+	case ICMP_TIME_EXCEEDED:
+	case ICMP_PARAMETERPROB:
+		break;
+	default:
+		return 1;
+	}
+
+	inside_iph = skb_header_pointer(skb, outside_hdrlen +
+					sizeof(struct icmphdr),
+					sizeof(_inside_iph), &_inside_iph);
+	if (inside_iph == NULL)
+		return 1;
+
+	if (inside_iph->protocol != IPPROTO_TCP &&
+	    inside_iph->protocol != IPPROTO_UDP)
+		return 1;
+
+	ports = skb_header_pointer(skb, outside_hdrlen +
+				   sizeof(struct icmphdr) +
+				   (inside_iph->ihl << 2),
+				   sizeof(_ports), &_ports);
+	if (ports == NULL)
+		return 1;
+
+	/* the inside IP packet is the one quoted from our side, thus
+	 * its saddr is the local address */
+	*protocol = inside_iph->protocol;
+	*laddr = inside_iph->saddr;
+	*lport = ports[0];
+	*raddr = inside_iph->daddr;
+	*rport = ports[1];
+
+	return 0;
+}
+
+
+static bool
+socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct udphdr _hdr, *hp = NULL;
+	struct sock *sk;
+	__be32 daddr, saddr;
+	__be16 dport, sport;
+	u8 protocol;
+#ifdef XT_SOCKET_HAVE_CONNTRACK
+	struct nf_conn const *ct;
+	enum ip_conntrack_info ctinfo;
+#endif
+
+	if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+		hp = skb_header_pointer(skb, ip_hdrlen(skb),
+					sizeof(_hdr), &_hdr);
+		if (hp == NULL)
+			return false;
+
+		protocol = iph->protocol;
+		saddr = iph->saddr;
+		sport = hp->source;
+		daddr = iph->daddr;
+		dport = hp->dest;
+
+	} else if (iph->protocol == IPPROTO_ICMP) {
+		if (extract_icmp_fields(skb, &protocol, &saddr, &daddr,
+					&sport, &dport))
+			return false;
+	} else {
+		return false;
+	}
+
+#ifdef XT_SOCKET_HAVE_CONNTRACK
+	/* Do the lookup with the original socket address in case this is a
+	 * reply packet of an established SNAT-ted connection. */
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct && (ct != &nf_conntrack_untracked) &&
+	    ((iph->protocol != IPPROTO_ICMP &&
+	      ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
+	     (iph->protocol == IPPROTO_ICMP &&
+	      ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) &&
+	    (ct->status & IPS_SRC_NAT_DONE)) {
+
+		daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+		dport = (iph->protocol == IPPROTO_TCP) ?
+			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
+			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+	}
+#endif
+
+	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
+				   saddr, daddr, sport, dport, par->in, false);
+	if (sk != NULL) {
+		bool wildcard = (inet_sk(sk)->rcv_saddr == 0);
+
+		nf_tproxy_put_sock(sk);
+		if (wildcard)
+			sk = NULL;
+	}
+
+	pr_debug("socket match: proto %u %08x:%u -> %08x:%u "
+		 "(orig %08x:%u) sock %p\n",
+		 protocol, ntohl(saddr), ntohs(sport),
+		 ntohl(daddr), ntohs(dport),
+		 ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk);
+
+	return (sk != NULL);
+}
+
+static struct xt_match socket_mt_reg __read_mostly = {
+	.name		= "socket",
+	.family		= AF_INET,
+	.match		= socket_mt,
+	.hooks		= 1 << NF_INET_PRE_ROUTING,
+	.me		= THIS_MODULE,
+};
+
+static int __init socket_mt_init(void)
+{
+	nf_defrag_ipv4_enable();
+	return xt_register_match(&socket_mt_reg);
+}
+
+static void __exit socket_mt_exit(void)
+{
+	xt_unregister_match(&socket_mt_reg);
+}
+
+module_init(socket_mt_init);
+module_exit(socket_mt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("x_tables socket match module");
+MODULE_ALIAS("ipt_socket");
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index a776dc36a19..4c946cbd731 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,12 +21,9 @@ MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
 static bool
-state_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_state_info *sinfo = matchinfo;
+	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
 
@@ -40,28 +37,25 @@ state_mt(const struct sk_buff *skb, const struct net_device *in,
 	return (sinfo->statemask & statebit);
 }
 
-static bool
-state_mt_check(const char *tablename, const void *inf,
-               const struct xt_match *match, void *matchinfo,
-               unsigned int hook_mask)
+static bool state_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->match->family);
 		return false;
 	}
 	return true;
 }
 
-static void state_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void state_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->match->family);
 }
 
 static struct xt_match state_mt_reg[] __read_mostly = {
 	{
 		.name		= "state",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= state_mt_check,
 		.match		= state_mt,
 		.destroy	= state_mt_destroy,
@@ -70,7 +64,7 @@ static struct xt_match state_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "state",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= state_mt_check,
 		.match		= state_mt,
 		.destroy	= state_mt_destroy,
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 43133080da7..0d75141139d 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -25,12 +25,9 @@ MODULE_ALIAS("ip6t_statistic");
 static DEFINE_SPINLOCK(nth_lock);
 
 static bool
-statistic_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
+	struct xt_statistic_info *info = (void *)par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
 
 	switch (info->mode) {
@@ -52,12 +49,9 @@ statistic_mt(const struct sk_buff *skb, const struct net_device *in,
 	return ret;
 }
 
-static bool
-statistic_mt_check(const char *tablename, const void *entry,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool statistic_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_statistic_info *info = matchinfo;
+	struct xt_statistic_info *info = par->matchinfo;
 
 	if (info->mode > XT_STATISTIC_MODE_MAX ||
 	    info->flags & ~XT_STATISTIC_MASK)
@@ -66,35 +60,24 @@ statistic_mt_check(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_match statistic_mt_reg[] __read_mostly = {
-	{
-		.name		= "statistic",
-		.family		= AF_INET,
-		.checkentry	= statistic_mt_check,
-		.match		= statistic_mt,
-		.matchsize	= sizeof(struct xt_statistic_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "statistic",
-		.family		= AF_INET6,
-		.checkentry	= statistic_mt_check,
-		.match		= statistic_mt,
-		.matchsize	= sizeof(struct xt_statistic_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match xt_statistic_mt_reg __read_mostly = {
+	.name       = "statistic",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = statistic_mt,
+	.checkentry = statistic_mt_check,
+	.matchsize  = sizeof(struct xt_statistic_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init statistic_mt_init(void)
 {
-	return xt_register_matches(statistic_mt_reg,
-	       ARRAY_SIZE(statistic_mt_reg));
+	return xt_register_match(&xt_statistic_mt_reg);
 }
 
 static void __exit statistic_mt_exit(void)
 {
-	xt_unregister_matches(statistic_mt_reg,
-	                      ARRAY_SIZE(statistic_mt_reg));
+	xt_unregister_match(&xt_statistic_mt_reg);
 }
 
 module_init(statistic_mt_init);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 4903182a062..b4d77411131 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -22,18 +22,15 @@ MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
 static bool
-string_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_string_info *conf = matchinfo;
+	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
 	int invert;
 
 	memset(&state, 0, sizeof(struct ts_state));
 
-	invert = (match->revision == 0 ? conf->u.v0.invert :
+	invert = (par->match->revision == 0 ? conf->u.v0.invert :
 				    conf->u.v1.flags & XT_STRING_FLAG_INVERT);
 
 	return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
@@ -43,12 +40,9 @@ string_mt(const struct sk_buff *skb, const struct net_device *in,
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
 
-static bool
-string_mt_check(const char *tablename, const void *ip,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
+static bool string_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_string_info *conf = matchinfo;
+	struct xt_string_info *conf = par->matchinfo;
 	struct ts_config *ts_conf;
 	int flags = TS_AUTOLOAD;
 
@@ -59,7 +53,7 @@ string_mt_check(const char *tablename, const void *ip,
 		return false;
 	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
 		return false;
-	if (match->revision == 1) {
+	if (par->match->revision == 1) {
 		if (conf->u.v1.flags &
 		    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
 			return false;
@@ -76,36 +70,16 @@ string_mt_check(const char *tablename, const void *ip,
 	return true;
 }
 
-static void string_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void string_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
+	textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config);
 }
 
-static struct xt_match string_mt_reg[] __read_mostly = {
-	{
-		.name 		= "string",
-		.revision	= 0,
-		.family		= AF_INET,
-		.checkentry	= string_mt_check,
-		.match 		= string_mt,
-		.destroy 	= string_mt_destroy,
-		.matchsize	= sizeof(struct xt_string_info),
-		.me 		= THIS_MODULE
-	},
-	{
-		.name 		= "string",
-		.revision	= 1,
-		.family		= AF_INET,
-		.checkentry	= string_mt_check,
-		.match 		= string_mt,
-		.destroy 	= string_mt_destroy,
-		.matchsize	= sizeof(struct xt_string_info),
-		.me 		= THIS_MODULE
-	},
+static struct xt_match xt_string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
 		.revision	= 0,
-		.family		= AF_INET6,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
 		.destroy 	= string_mt_destroy,
@@ -115,7 +89,7 @@ static struct xt_match string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
 		.revision	= 1,
-		.family		= AF_INET6,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
 		.destroy 	= string_mt_destroy,
@@ -126,12 +100,13 @@ static struct xt_match string_mt_reg[] __read_mostly = {
 
 static int __init string_mt_init(void)
 {
-	return xt_register_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
+	return xt_register_matches(xt_string_mt_reg,
+				   ARRAY_SIZE(xt_string_mt_reg));
 }
 
 static void __exit string_mt_exit(void)
 {
-	xt_unregister_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
+	xt_unregister_matches(xt_string_mt_reg, ARRAY_SIZE(xt_string_mt_reg));
 }
 
 module_init(string_mt_init);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 6771bf01275..4809b34b10f 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,12 +25,9 @@ MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
 static bool
-tcpmss_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_tcpmss_match_info *info = matchinfo;
+	const struct xt_tcpmss_match_info *info = par->matchinfo;
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
 	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
@@ -39,7 +36,7 @@ tcpmss_mt(const struct sk_buff *skb, const struct net_device *in,
 	unsigned int i, optlen;
 
 	/* If we don't have the whole header, drop packet. */
-	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		goto dropit;
 
@@ -52,7 +49,7 @@ tcpmss_mt(const struct sk_buff *skb, const struct net_device *in,
 		goto out;
 
 	/* Truncated options. */
-	op = skb_header_pointer(skb, protoff + sizeof(*th), optlen, _opt);
+	op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt);
 	if (op == NULL)
 		goto dropit;
 
@@ -76,14 +73,14 @@ out:
 	return info->invert;
 
 dropit:
-	*hotdrop = true;
+	*par->hotdrop = true;
 	return false;
 }
 
 static struct xt_match tcpmss_mt_reg[] __read_mostly = {
 	{
 		.name		= "tcpmss",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= tcpmss_mt,
 		.matchsize	= sizeof(struct xt_tcpmss_match_info),
 		.proto		= IPPROTO_TCP,
@@ -91,7 +88,7 @@ static struct xt_match tcpmss_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "tcpmss",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= tcpmss_mt,
 		.matchsize	= sizeof(struct xt_tcpmss_match_info),
 		.proto		= IPPROTO_TCP,
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 951b06b8d70..1ebdc4934ee 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -68,25 +68,22 @@ tcp_find_option(u_int8_t option,
 	return invert;
 }
 
-static bool
-tcp_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
-	const struct xt_tcp *tcpinfo = matchinfo;
+	const struct xt_tcp *tcpinfo = par->matchinfo;
 
-	if (offset) {
+	if (par->fragoff != 0) {
 		/* To quote Alan:
 
 		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
 		   causes this. Its a cracker trying to break in by doing a
 		   flag overwrite to pass the direction checks.
 		*/
-		if (offset == 1) {
+		if (par->fragoff == 1) {
 			duprintf("Dropping evil TCP offset=1 frag.\n");
-			*hotdrop = true;
+			*par->hotdrop = true;
 		}
 		/* Must not be a fragment. */
 		return false;
@@ -94,12 +91,12 @@ tcp_mt(const struct sk_buff *skb, const struct net_device *in,
 
 #define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))
 
-	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
@@ -117,49 +114,42 @@ tcp_mt(const struct sk_buff *skb, const struct net_device *in,
 		return false;
 	if (tcpinfo->option) {
 		if (th->doff * 4 < sizeof(_tcph)) {
-			*hotdrop = true;
+			*par->hotdrop = true;
 			return false;
 		}
-		if (!tcp_find_option(tcpinfo->option, skb, protoff,
+		if (!tcp_find_option(tcpinfo->option, skb, par->thoff,
 				     th->doff*4 - sizeof(_tcph),
 				     tcpinfo->invflags & XT_TCP_INV_OPTION,
-				     hotdrop))
+				     par->hotdrop))
 			return false;
 	}
 	return true;
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-tcp_mt_check(const char *tablename, const void *info,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool tcp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_tcp *tcpinfo = matchinfo;
+	const struct xt_tcp *tcpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
 	return !(tcpinfo->invflags & ~XT_TCP_INV_MASK);
 }
 
-static bool
-udp_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct udphdr *uh;
 	struct udphdr _udph;
-	const struct xt_udp *udpinfo = matchinfo;
+	const struct xt_udp *udpinfo = par->matchinfo;
 
 	/* Must not be a fragment. */
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
+	uh = skb_header_pointer(skb, par->thoff, sizeof(_udph), &_udph);
 	if (uh == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil UDP tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
@@ -171,13 +161,9 @@ udp_mt(const struct sk_buff *skb, const struct net_device *in,
 			      !!(udpinfo->invflags & XT_UDP_INV_DSTPT));
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-udp_mt_check(const char *tablename, const void *info,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool udp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_udp *udpinfo = matchinfo;
+	const struct xt_udp *udpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
 	return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
@@ -186,7 +172,7 @@ udp_mt_check(const char *tablename, const void *info,
 static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	{
 		.name		= "tcp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= tcp_mt_check,
 		.match		= tcp_mt,
 		.matchsize	= sizeof(struct xt_tcp),
@@ -195,7 +181,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "tcp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= tcp_mt_check,
 		.match		= tcp_mt,
 		.matchsize	= sizeof(struct xt_tcp),
@@ -204,7 +190,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "udp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= udp_mt_check,
 		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
@@ -213,7 +199,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "udp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= udp_mt_check,
 		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
@@ -222,7 +208,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "udplite",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= udp_mt_check,
 		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
@@ -231,7 +217,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "udplite",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= udp_mt_check,
 		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 9f328593287..29375ba8db7 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -136,26 +136,26 @@ static void localtime_3(struct xtm *r, time_t time)
 	 * from w repeatedly while counting.)
 	 */
 	if (is_leap(year)) {
+		/* use days_since_leapyear[] in a leap year */
 		for (i = ARRAY_SIZE(days_since_leapyear) - 1;
-		    i > 0 && days_since_year[i] > w; --i)
+		    i > 0 && days_since_leapyear[i] > w; --i)
 			/* just loop */;
+		r->monthday = w - days_since_leapyear[i] + 1;
 	} else {
 		for (i = ARRAY_SIZE(days_since_year) - 1;
 		    i > 0 && days_since_year[i] > w; --i)
 			/* just loop */;
+		r->monthday = w - days_since_year[i] + 1;
 	}
 
 	r->month    = i + 1;
-	r->monthday = w - days_since_year[i] + 1;
 	return;
 }
 
 static bool
-time_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_time_info *info = matchinfo;
+	const struct xt_time_info *info = par->matchinfo;
 	unsigned int packet_time;
 	struct xtm current_time;
 	s64 stamp;
@@ -218,12 +218,9 @@ time_mt(const struct sk_buff *skb, const struct net_device *in,
 	return true;
 }
 
-static bool
-time_mt_check(const char *tablename, const void *ip,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+static bool time_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_time_info *info = matchinfo;
+	const struct xt_time_info *info = par->matchinfo;
 
 	if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
 	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
@@ -235,33 +232,23 @@ time_mt_check(const char *tablename, const void *ip,
 	return true;
 }
 
-static struct xt_match time_mt_reg[] __read_mostly = {
-	{
-		.name       = "time",
-		.family     = AF_INET,
-		.match      = time_mt,
-		.matchsize  = sizeof(struct xt_time_info),
-		.checkentry = time_mt_check,
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "time",
-		.family     = AF_INET6,
-		.match      = time_mt,
-		.matchsize  = sizeof(struct xt_time_info),
-		.checkentry = time_mt_check,
-		.me         = THIS_MODULE,
-	},
+static struct xt_match xt_time_mt_reg __read_mostly = {
+	.name       = "time",
+	.family     = NFPROTO_UNSPEC,
+	.match      = time_mt,
+	.checkentry = time_mt_check,
+	.matchsize  = sizeof(struct xt_time_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init time_mt_init(void)
 {
-	return xt_register_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
+	return xt_register_match(&xt_time_mt_reg);
 }
 
 static void __exit time_mt_exit(void)
 {
-	xt_unregister_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
+	xt_unregister_match(&xt_time_mt_reg);
 }
 
 module_init(time_mt_init);
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 627e0f336d5..24a52762450 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -87,43 +87,32 @@ static bool u32_match_it(const struct xt_u32 *data,
 	return true;
 }
 
-static bool
-u32_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_u32 *data = matchinfo;
+	const struct xt_u32 *data = par->matchinfo;
 	bool ret;
 
 	ret = u32_match_it(data, skb);
 	return ret ^ data->invert;
 }
 
-static struct xt_match u32_mt_reg[] __read_mostly = {
-	{
-		.name       = "u32",
-		.family     = AF_INET,
-		.match      = u32_mt,
-		.matchsize  = sizeof(struct xt_u32),
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "u32",
-		.family     = AF_INET6,
-		.match      = u32_mt,
-		.matchsize  = sizeof(struct xt_u32),
-		.me         = THIS_MODULE,
-	},
+static struct xt_match xt_u32_mt_reg __read_mostly = {
+	.name       = "u32",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = u32_mt,
+	.matchsize  = sizeof(struct xt_u32),
+	.me         = THIS_MODULE,
 };
 
 static int __init u32_mt_init(void)
 {
-	return xt_register_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
+	return xt_register_match(&xt_u32_mt_reg);
 }
 
 static void __exit u32_mt_exit(void)
 {
-	xt_unregister_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
+	xt_unregister_match(&xt_u32_mt_reg);
 }
 
 module_init(u32_mt_init);
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-10-11 09:33:18 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-10-11 09:33:18 -0700
commit	4dd9ec4946b4651a295d3bc8df9c15ac692a8f4e (patch)
tree	afb300c752de7175bb2df4722d5c857e070c75d9 /net/netfilter
parent	86ed5a93b8b56e4e0877b914af0e10883a196384 (diff)
parent	6861ff35ec5b60fafaf8651754c9a75142bfa9a4 (diff)