diff options
Diffstat (limited to 'net/xfrm')
| -rw-r--r-- | net/xfrm/Kconfig | 29 | ||||
| -rw-r--r-- | net/xfrm/Makefile | 5 | ||||
| -rw-r--r-- | net/xfrm/xfrm_algo.c | 109 | ||||
| -rw-r--r-- | net/xfrm/xfrm_hash.c | 2 | ||||
| -rw-r--r-- | net/xfrm/xfrm_hash.h | 40 | ||||
| -rw-r--r-- | net/xfrm/xfrm_input.c | 128 | ||||
| -rw-r--r-- | net/xfrm/xfrm_ipcomp.c | 39 | ||||
| -rw-r--r-- | net/xfrm/xfrm_output.c | 63 | ||||
| -rw-r--r-- | net/xfrm/xfrm_policy.c | 1050 | ||||
| -rw-r--r-- | net/xfrm/xfrm_proc.c | 14 | ||||
| -rw-r--r-- | net/xfrm/xfrm_replay.c | 603 | ||||
| -rw-r--r-- | net/xfrm/xfrm_state.c | 794 | ||||
| -rw-r--r-- | net/xfrm/xfrm_sysctl.c | 6 | ||||
| -rw-r--r-- | net/xfrm/xfrm_user.c | 917 | 
14 files changed, 2543 insertions, 1256 deletions
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 6d081674515..bda1a13628a 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -3,12 +3,17 @@  #  config XFRM         bool -       select CRYPTO         depends on NET +config XFRM_ALGO +	tristate +	select XFRM +	select CRYPTO +  config XFRM_USER  	tristate "Transformation user configuration interface" -	depends on INET && XFRM +	depends on INET +	select XFRM_ALGO  	---help---  	  Support for Transformation(XFRM) user configuration interface  	  like IPsec used by native Linux tools. @@ -16,8 +21,8 @@ config XFRM_USER  	  If unsure, say Y.  config XFRM_SUB_POLICY -	bool "Transformation sub policy support (EXPERIMENTAL)" -	depends on XFRM && EXPERIMENTAL +	bool "Transformation sub policy support" +	depends on XFRM  	---help---  	  Support sub policy for developers. By using sub policy with main  	  one, two policies can be applied to the same packet at once. @@ -26,8 +31,8 @@ config XFRM_SUB_POLICY  	  If unsure, say N.  config XFRM_MIGRATE -	bool "Transformation migrate database (EXPERIMENTAL)" -	depends on XFRM && EXPERIMENTAL +	bool "Transformation migrate database" +	depends on XFRM  	---help---  	  A feature to update locator(s) of a given IPsec security  	  association dynamically.  This feature is required, for @@ -37,8 +42,8 @@ config XFRM_MIGRATE  	  If unsure, say N.  config XFRM_STATISTICS -	bool "Transformation statistics (EXPERIMENTAL)" -	depends on INET && XFRM && PROC_FS && EXPERIMENTAL +	bool "Transformation statistics" +	depends on INET && XFRM && PROC_FS  	---help---  	  This statistics is not a SNMP/MIB specification but shows  	  statistics about transformation error (or almost error) factor @@ -48,13 +53,13 @@ config XFRM_STATISTICS  config XFRM_IPCOMP  	tristate -	select XFRM +	select XFRM_ALGO  	select CRYPTO  	select CRYPTO_DEFLATE  config NET_KEY  	tristate "PF_KEY sockets" -	select XFRM +	select XFRM_ALGO  	---help---  	  PF_KEYv2 socket family, compatible to KAME ones.  	  They are required if you are going to use IPsec tools ported @@ -63,8 +68,8 @@ config NET_KEY  	  Say Y unless you know what you are doing.  config NET_KEY_MIGRATE -	bool "PF_KEY MIGRATE (EXPERIMENTAL)" -	depends on NET_KEY && EXPERIMENTAL +	bool "PF_KEY MIGRATE" +	depends on NET_KEY  	select XFRM_MIGRATE  	---help---  	  Add a PF_KEY MIGRATE message to PF_KEYv2 socket family. diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index c631047e1b2..c0e961983f1 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,8 +3,9 @@  #  obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ -		      xfrm_input.o xfrm_output.o xfrm_algo.o \ -		      xfrm_sysctl.o +		      xfrm_input.o xfrm_output.o \ +		      xfrm_sysctl.o xfrm_replay.o  obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o +obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o  obj-$(CONFIG_XFRM_USER) += xfrm_user.o  obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 8b4d6e3246e..debe733386f 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -15,9 +15,6 @@  #include <linux/crypto.h>  #include <linux/scatterlist.h>  #include <net/xfrm.h> -#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) -#include <net/ah.h> -#endif  #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)  #include <net/esp.h>  #endif @@ -38,6 +35,8 @@ static struct xfrm_algo_desc aead_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8,  		.sadb_alg_ivlen = 8, @@ -54,6 +53,8 @@ static struct xfrm_algo_desc aead_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12,  		.sadb_alg_ivlen = 8, @@ -70,6 +71,8 @@ static struct xfrm_algo_desc aead_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16,  		.sadb_alg_ivlen = 8, @@ -86,6 +89,8 @@ static struct xfrm_algo_desc aead_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8,  		.sadb_alg_ivlen = 8, @@ -102,6 +107,8 @@ static struct xfrm_algo_desc aead_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12,  		.sadb_alg_ivlen = 8, @@ -118,6 +125,8 @@ static struct xfrm_algo_desc aead_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16,  		.sadb_alg_ivlen = 8, @@ -134,6 +143,8 @@ static struct xfrm_algo_desc aead_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC,  		.sadb_alg_ivlen = 8, @@ -154,6 +165,8 @@ static struct xfrm_algo_desc aalg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_AALG_NULL,  		.sadb_alg_ivlen = 0, @@ -172,6 +185,8 @@ static struct xfrm_algo_desc aalg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_AALG_MD5HMAC,  		.sadb_alg_ivlen = 0, @@ -190,6 +205,8 @@ static struct xfrm_algo_desc aalg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_AALG_SHA1HMAC,  		.sadb_alg_ivlen = 0, @@ -208,6 +225,8 @@ static struct xfrm_algo_desc aalg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,  		.sadb_alg_ivlen = 0, @@ -225,6 +244,8 @@ static struct xfrm_algo_desc aalg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_AALG_SHA2_384HMAC,  		.sadb_alg_ivlen = 0, @@ -242,6 +263,8 @@ static struct xfrm_algo_desc aalg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_AALG_SHA2_512HMAC,  		.sadb_alg_ivlen = 0, @@ -260,6 +283,8 @@ static struct xfrm_algo_desc aalg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,  		.sadb_alg_ivlen = 0, @@ -277,6 +302,8 @@ static struct xfrm_algo_desc aalg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC,  		.sadb_alg_ivlen = 0, @@ -284,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = {  		.sadb_alg_maxbits = 128  	}  }, +{ +	/* rfc4494 */ +	.name = "cmac(aes)", + +	.uinfo = { +		.auth = { +			.icv_truncbits = 96, +			.icv_fullbits = 128, +		} +	}, + +	.pfkey_supported = 0, +},  };  static struct xfrm_algo_desc ealg_list[] = { @@ -298,6 +338,8 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id =	SADB_EALG_NULL,  		.sadb_alg_ivlen = 0, @@ -316,6 +358,8 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_EALG_DESCBC,  		.sadb_alg_ivlen = 8, @@ -334,6 +378,8 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_EALG_3DESCBC,  		.sadb_alg_ivlen = 8, @@ -352,6 +398,8 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_CASTCBC,  		.sadb_alg_ivlen = 8, @@ -370,6 +418,8 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,  		.sadb_alg_ivlen = 8, @@ -388,6 +438,8 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_AESCBC,  		.sadb_alg_ivlen = 8, @@ -406,6 +458,8 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_SERPENTCBC,  		.sadb_alg_ivlen = 8, @@ -424,6 +478,8 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_CAMELLIACBC,  		.sadb_alg_ivlen = 8, @@ -442,6 +498,8 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_TWOFISHCBC,  		.sadb_alg_ivlen = 8, @@ -459,11 +517,13 @@ static struct xfrm_algo_desc ealg_list[] = {  		}  	}, +	.pfkey_supported = 1, +  	.desc = {  		.sadb_alg_id = SADB_X_EALG_AESCTR,  		.sadb_alg_ivlen	= 8, -		.sadb_alg_minbits = 128, -		.sadb_alg_maxbits = 256 +		.sadb_alg_minbits = 160, +		.sadb_alg_maxbits = 288  	}  },  }; @@ -476,6 +536,7 @@ static struct xfrm_algo_desc calg_list[] = {  			.threshold = 90,  		}  	}, +	.pfkey_supported = 1,  	.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }  },  { @@ -485,6 +546,7 @@ static struct xfrm_algo_desc calg_list[] = {  			.threshold = 90,  		}  	}, +	.pfkey_supported = 1,  	.desc = { .sadb_alg_id = SADB_X_CALG_LZS }  },  { @@ -494,6 +556,7 @@ static struct xfrm_algo_desc calg_list[] = {  			.threshold = 50,  		}  	}, +	.pfkey_supported = 1,  	.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }  },  }; @@ -618,21 +681,21 @@ static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry,  			(entry->compat && !strcmp(name, entry->compat)));  } -struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe)  {  	return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name,  			      probe);  }  EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); -struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe)  {  	return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name,  			      probe);  }  EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); -struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe)  {  	return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name,  			      probe); @@ -654,7 +717,7 @@ static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry,  	       !strcmp(name, entry->name);  } -struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, int probe) +struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe)  {  	struct xfrm_aead_name data = {  		.name = name, @@ -703,8 +766,7 @@ void xfrm_probe_algs(void)  	}  	for (i = 0; i < ealg_entries(); i++) { -		status = crypto_has_blkcipher(ealg_list[i].name, 0, -					      CRYPTO_ALG_ASYNC); +		status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0);  		if (ealg_list[i].available != status)  			ealg_list[i].available = status;  	} @@ -718,37 +780,26 @@ void xfrm_probe_algs(void)  }  EXPORT_SYMBOL_GPL(xfrm_probe_algs); -int xfrm_count_auth_supported(void) +int xfrm_count_pfkey_auth_supported(void)  {  	int i, n;  	for (i = 0, n = 0; i < aalg_entries(); i++) -		if (aalg_list[i].available) +		if (aalg_list[i].available && aalg_list[i].pfkey_supported)  			n++;  	return n;  } -EXPORT_SYMBOL_GPL(xfrm_count_auth_supported); +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_auth_supported); -int xfrm_count_enc_supported(void) +int xfrm_count_pfkey_enc_supported(void)  {  	int i, n;  	for (i = 0, n = 0; i < ealg_entries(); i++) -		if (ealg_list[i].available) +		if (ealg_list[i].available && ealg_list[i].pfkey_supported)  			n++;  	return n;  } -EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported); -#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) - -void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) -{ -	if (tail != skb) { -		skb->data_len += len; -		skb->len += len; -	} -	return skb_put(tail, len); -} -EXPORT_SYMBOL_GPL(pskb_put); -#endif +MODULE_LICENSE("GPL"); diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c index a2023ec5232..1e98bc0fe0a 100644 --- a/net/xfrm/xfrm_hash.c +++ b/net/xfrm/xfrm_hash.c @@ -19,7 +19,7 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz)  	if (sz <= PAGE_SIZE)  		n = kzalloc(sz, GFP_KERNEL);  	else if (hashdist) -		n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); +		n = vzalloc(sz);  	else  		n = (struct hlist_head *)  			__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 8e69533d231..0622d319e1f 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -4,29 +4,32 @@  #include <linux/xfrm.h>  #include <linux/socket.h> -static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr)  {  	return ntohl(addr->a4);  } -static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr)  {  	return ntohl(addr->a6[2] ^ addr->a6[3]);  } -static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr, +						    const xfrm_address_t *saddr)  {  	u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4;  	return ntohl((__force __be32)sum);  } -static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr, +						    const xfrm_address_t *saddr)  {  	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^  		     saddr->a6[2] ^ saddr->a6[3]);  } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, +static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr, +					   const xfrm_address_t *saddr,  					   u32 reqid, unsigned short family,  					   unsigned int hmask)  { @@ -42,10 +45,10 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t  	return (h ^ (h >> 16)) & hmask;  } -static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, -				       xfrm_address_t *saddr, -				       unsigned short family, -				       unsigned int hmask) +static inline unsigned int __xfrm_src_hash(const xfrm_address_t *daddr, +					   const xfrm_address_t *saddr, +					   unsigned short family, +					   unsigned int hmask)  {  	unsigned int h = family;  	switch (family) { @@ -60,8 +63,8 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,  }  static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, -		unsigned int hmask) +__xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto, +		unsigned short family, unsigned int hmask)  {  	unsigned int h = (__force u32)spi ^ proto;  	switch (family) { @@ -80,10 +83,11 @@ static inline unsigned int __idx_hash(u32 index, unsigned int hmask)  	return (index ^ (index >> 8)) & hmask;  } -static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) +static inline unsigned int __sel_hash(const struct xfrm_selector *sel, +				      unsigned short family, unsigned int hmask)  { -	xfrm_address_t *daddr = &sel->daddr; -	xfrm_address_t *saddr = &sel->saddr; +	const xfrm_address_t *daddr = &sel->daddr; +	const xfrm_address_t *saddr = &sel->saddr;  	unsigned int h = 0;  	switch (family) { @@ -107,7 +111,9 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short  	return h & hmask;  } -static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) +static inline unsigned int __addr_hash(const xfrm_address_t *daddr, +				       const xfrm_address_t *saddr, +				       unsigned short family, unsigned int hmask)  {  	unsigned int h = 0; @@ -124,7 +130,7 @@ static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *sa  	return h & hmask;  } -extern struct hlist_head *xfrm_hash_alloc(unsigned int sz); -extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz); +struct hlist_head *xfrm_hash_alloc(unsigned int sz); +void xfrm_hash_free(struct hlist_head *n, unsigned int sz);  #endif /* _XFRM_HASH_H */ diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 45f1c98d4fc..85d1d476461 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -16,6 +16,81 @@  static struct kmem_cache *secpath_cachep __read_mostly; +static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); +static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; + +int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) +{ +	int err = 0; + +	if (unlikely(afinfo == NULL)) +		return -EINVAL; +	if (unlikely(afinfo->family >= NPROTO)) +		return -EAFNOSUPPORT; +	spin_lock_bh(&xfrm_input_afinfo_lock); +	if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) +		err = -ENOBUFS; +	else +		rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); +	spin_unlock_bh(&xfrm_input_afinfo_lock); +	return err; +} +EXPORT_SYMBOL(xfrm_input_register_afinfo); + +int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) +{ +	int err = 0; + +	if (unlikely(afinfo == NULL)) +		return -EINVAL; +	if (unlikely(afinfo->family >= NPROTO)) +		return -EAFNOSUPPORT; +	spin_lock_bh(&xfrm_input_afinfo_lock); +	if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { +		if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) +			err = -EINVAL; +		else +			RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); +	} +	spin_unlock_bh(&xfrm_input_afinfo_lock); +	synchronize_rcu(); +	return err; +} +EXPORT_SYMBOL(xfrm_input_unregister_afinfo); + +static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +{ +	struct xfrm_input_afinfo *afinfo; + +	if (unlikely(family >= NPROTO)) +		return NULL; +	rcu_read_lock(); +	afinfo = rcu_dereference(xfrm_input_afinfo[family]); +	if (unlikely(!afinfo)) +		rcu_read_unlock(); +	return afinfo; +} + +static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo) +{ +	rcu_read_unlock(); +} + +static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, +		       int err) +{ +	int ret; +	struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + +	if (!afinfo) +		return -EAFNOSUPPORT; + +	ret = afinfo->callback(skb, protocol, err); +	xfrm_input_put_afinfo(afinfo); + +	return ret; +} +  void __secpath_destroy(struct sec_path *sp)  {  	int i; @@ -67,7 +142,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)  	case IPPROTO_COMP:  		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))  			return -EINVAL; -		*spi = htonl(ntohs(*(__be16*)(skb_transport_header(skb) + 2))); +		*spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2)));  		*seq = 0;  		return 0;  	default: @@ -77,8 +152,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)  	if (!pskb_may_pull(skb, hlen))  		return -EINVAL; -	*spi = *(__be32*)(skb_transport_header(skb) + offset); -	*seq = *(__be32*)(skb_transport_header(skb) + offset_seq); +	*spi = *(__be32 *)(skb_transport_header(skb) + offset); +	*seq = *(__be32 *)(skb_transport_header(skb) + offset_seq);  	return 0;  } @@ -107,7 +182,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)  	struct net *net = dev_net(skb->dev);  	int err;  	__be32 seq; -	struct xfrm_state *x; +	__be32 seq_hi; +	struct xfrm_state *x = NULL;  	xfrm_address_t *daddr;  	struct xfrm_mode *inner_mode;  	unsigned int family; @@ -118,10 +194,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)  	if (encap_type < 0) {  		async = 1;  		x = xfrm_input_state(skb); -		seq = XFRM_SKB_CB(skb)->seq.input; +		seq = XFRM_SKB_CB(skb)->seq.input.low; +		family = x->outer_mode->afinfo->family;  		goto resume;  	} +	daddr = (xfrm_address_t *)(skb_network_header(skb) + +				   XFRM_SPI_SKB_CB(skb)->daddroff); +	family = XFRM_SPI_SKB_CB(skb)->family; +  	/* Allocate new secpath or COW existing one. */  	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {  		struct sec_path *sp; @@ -136,10 +217,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)  		skb->sp = sp;  	} -	daddr = (xfrm_address_t *)(skb_network_header(skb) + -				   XFRM_SPI_SKB_CB(skb)->daddroff); -	family = XFRM_SPI_SKB_CB(skb)->family; -  	seq = 0;  	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {  		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); @@ -161,7 +238,17 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)  		skb->sp->xvec[skb->sp->len++] = x; +		if (xfrm_tunnel_check(skb, x, family)) { +			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); +			goto drop; +		} +  		spin_lock(&x->lock); +		if (unlikely(x->km.state == XFRM_STATE_ACQ)) { +			XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); +			goto drop_unlock; +		} +  		if (unlikely(x->km.state != XFRM_STATE_VALID)) {  			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID);  			goto drop_unlock; @@ -172,7 +259,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)  			goto drop_unlock;  		} -		if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { +		if (x->repl->check(x, skb, seq)) {  			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);  			goto drop_unlock;  		} @@ -184,13 +271,17 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)  		spin_unlock(&x->lock); -		XFRM_SKB_CB(skb)->seq.input = seq; +		seq_hi = htonl(xfrm_replay_seqhi(x, seq)); + +		XFRM_SKB_CB(skb)->seq.input.low = seq; +		XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; + +		skb_dst_force(skb);  		nexthdr = x->type->input(x, skb);  		if (nexthdr == -EINPROGRESS)  			return 0; -  resume:  		spin_lock(&x->lock);  		if (nexthdr <= 0) { @@ -206,8 +297,12 @@ resume:  		/* only the first xfrm gets the encap type */  		encap_type = 0; -		if (x->props.replay_window) -			xfrm_replay_advance(x, seq); +		if (async && x->repl->recheck(x, skb, seq)) { +			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); +			goto drop_unlock; +		} + +		x->repl->advance(x, seq);  		x->curlft.bytes += skb->len;  		x->curlft.packets++; @@ -248,6 +343,10 @@ resume:  		}  	} while (!err); +	err = xfrm_rcv_cb(skb, family, x->type->proto, 0); +	if (err) +		goto drop; +  	nf_reset(skb);  	if (decaps) { @@ -261,6 +360,7 @@ resume:  drop_unlock:  	spin_unlock(&x->lock);  drop: +	xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1);  	kfree_skb(skb);  	return 0;  } diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index fc91ad7ee26..ccfdc7115a8 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -70,26 +70,29 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)  	while ((scratch += len, dlen -= len) > 0) {  		skb_frag_t *frag; +		struct page *page;  		err = -EMSGSIZE;  		if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS))  			goto out;  		frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; -		frag->page = alloc_page(GFP_ATOMIC); +		page = alloc_page(GFP_ATOMIC);  		err = -ENOMEM; -		if (!frag->page) +		if (!page)  			goto out; +		__skb_frag_set_page(frag, page); +  		len = PAGE_SIZE;  		if (dlen < len)  			len = dlen; -		memcpy(page_address(frag->page), scratch, len); -  		frag->page_offset = 0; -		frag->size = len; +		skb_frag_size_set(frag, len); +		memcpy(skb_frag_address(frag), scratch, len); +  		skb->truesize += len;  		skb->data_len += len;  		skb->len += len; @@ -138,14 +141,14 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)  	const int plen = skb->len;  	int dlen = IPCOMP_SCRATCH_SIZE;  	u8 *start = skb->data; -	const int cpu = get_cpu(); -	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); -	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); +	struct crypto_comp *tfm; +	u8 *scratch;  	int err;  	local_bh_disable(); +	scratch = *this_cpu_ptr(ipcomp_scratches); +	tfm = *this_cpu_ptr(ipcd->tfms);  	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); -	local_bh_enable();  	if (err)  		goto out; @@ -155,13 +158,13 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)  	}  	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); -	put_cpu(); +	local_bh_enable();  	pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));  	return 0;  out: -	put_cpu(); +	local_bh_enable();  	return err;  } @@ -217,8 +220,8 @@ static void ipcomp_free_scratches(void)  static void * __percpu *ipcomp_alloc_scratches(void)  { -	int i;  	void * __percpu *scratches; +	int i;  	if (ipcomp_scratch_users++)  		return ipcomp_scratches; @@ -230,7 +233,9 @@ static void * __percpu *ipcomp_alloc_scratches(void)  	ipcomp_scratches = scratches;  	for_each_possible_cpu(i) { -		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); +		void *scratch; + +		scratch = vmalloc_node(IPCOMP_SCRATCH_SIZE, cpu_to_node(i));  		if (!scratch)  			return NULL;  		*per_cpu_ptr(scratches, i) = scratch; @@ -273,18 +278,16 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name)  	struct crypto_comp * __percpu *tfms;  	int cpu; -	/* This can be any valid CPU ID so we don't need locking. */ -	cpu = raw_smp_processor_id();  	list_for_each_entry(pos, &ipcomp_tfms_list, list) {  		struct crypto_comp *tfm; -		tfms = pos->tfms; -		tfm = *per_cpu_ptr(tfms, cpu); +		/* This can be any valid CPU ID so we don't need locking. */ +		tfm = __this_cpu_read(*pos->tfms);  		if (!strcmp(crypto_comp_name(tfm), alg_name)) {  			pos->users++; -			return tfms; +			return pos->tfms;  		}  	} diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 64f2ae1fdc1..c51e8f7b865 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -21,7 +21,7 @@  static int xfrm_output2(struct sk_buff *skb); -static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm_skb_check_space(struct sk_buff *skb)  {  	struct dst_entry *dst = skb_dst(skb);  	int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) @@ -48,7 +48,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)  		goto resume;  	do { -		err = xfrm_state_check_space(x, skb); +		err = xfrm_skb_check_space(skb);  		if (err) {  			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);  			goto error_nolock; @@ -61,23 +61,23 @@ static int xfrm_output_one(struct sk_buff *skb, int err)  		}  		spin_lock_bh(&x->lock); + +		if (unlikely(x->km.state != XFRM_STATE_VALID)) { +			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); +			err = -EINVAL; +			goto error; +		} +  		err = xfrm_state_check_expire(x);  		if (err) {  			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED);  			goto error;  		} -		if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { -			XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; -			if (unlikely(x->replay.oseq == 0)) { -				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); -				x->replay.oseq--; -				xfrm_audit_state_replay_overflow(x, skb); -				err = -EOVERFLOW; -				goto error; -			} -			if (xfrm_aevent_is_on(net)) -				xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +		err = x->repl->overflow(x, skb); +		if (err) { +			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); +			goto error;  		}  		x->curlft.bytes += skb->len; @@ -85,9 +85,11 @@ static int xfrm_output_one(struct sk_buff *skb, int err)  		spin_unlock_bh(&x->lock); +		skb_dst_force(skb); +  		err = x->type->output(x, skb);  		if (err == -EINPROGRESS) -			goto out_exit; +			goto out;  resume:  		if (err) { @@ -101,19 +103,18 @@ resume:  			err = -EHOSTUNREACH;  			goto error_nolock;  		} -		skb_dst_set(skb, dst_clone(dst)); +		skb_dst_set(skb, dst);  		x = dst->xfrm;  	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); -	err = 0; +	return 0; -out_exit: -	return err;  error:  	spin_unlock_bh(&x->lock);  error_nolock:  	kfree_skb(skb); -	goto out_exit; +out: +	return err;  }  int xfrm_output_resume(struct sk_buff *skb, int err) @@ -198,6 +199,7 @@ int xfrm_output(struct sk_buff *skb)  	return xfrm_output2(skb);  } +EXPORT_SYMBOL_GPL(xfrm_output);  int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)  { @@ -212,6 +214,25 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)  		return -EAFNOSUPPORT;  	return inner_mode->afinfo->extract_output(x, skb);  } - -EXPORT_SYMBOL_GPL(xfrm_output);  EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); + +void xfrm_local_error(struct sk_buff *skb, int mtu) +{ +	unsigned int proto; +	struct xfrm_state_afinfo *afinfo; + +	if (skb->protocol == htons(ETH_P_IP)) +		proto = AF_INET; +	else if (skb->protocol == htons(ETH_P_IPV6)) +		proto = AF_INET6; +	else +		return; + +	afinfo = xfrm_state_get_afinfo(proto); +	if (!afinfo) +		return; + +	afinfo->local_error(skb, mtu); +	xfrm_state_put_afinfo(afinfo); +} +EXPORT_SYMBOL_GPL(xfrm_local_error); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 044e7789851..0525d78ba32 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -26,6 +26,7 @@  #include <linux/cache.h>  #include <linux/audit.h>  #include <net/dst.h> +#include <net/flow.h>  #include <net/xfrm.h>  #include <net/ip.h>  #ifdef CONFIG_XFRM_STATISTICS @@ -34,53 +35,52 @@  #include "xfrm_hash.h" -DEFINE_MUTEX(xfrm_cfg_mutex); -EXPORT_SYMBOL(xfrm_cfg_mutex); +#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) +#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) +#define XFRM_MAX_QUEUE_LEN	100 -static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); -static struct dst_entry *xfrm_policy_sk_bundles; -static DEFINE_RWLOCK(xfrm_policy_lock); - -static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); -static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; +static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); +static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] +						__read_mostly;  static struct kmem_cache *xfrm_dst_cache __read_mostly; -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);  static void xfrm_init_pmtu(struct dst_entry *dst);  static int stale_bundle(struct dst_entry *dst); -static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, -			  struct flowi *fl, int family, int strict); - +static int xfrm_bundle_ok(struct xfrm_dst *xdst); +static void xfrm_policy_queue_process(unsigned long arg);  static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,  						int dir); -static inline int -__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) +static inline bool +__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)  { -	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && -		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && -		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && -		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && -		(fl->proto == sel->proto || !sel->proto) && -		(fl->oif == sel->ifindex || !sel->ifindex); +	const struct flowi4 *fl4 = &fl->u.ip4; + +	return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && +		addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && +		!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && +		!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && +		(fl4->flowi4_proto == sel->proto || !sel->proto) && +		(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);  } -static inline int -__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) +static inline bool +__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)  { -	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && -		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && -		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && -		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && -		(fl->proto == sel->proto || !sel->proto) && -		(fl->oif == sel->ifindex || !sel->ifindex); +	const struct flowi6 *fl6 = &fl->u.ip6; + +	return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && +		addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && +		!((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && +		!((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && +		(fl6->flowi6_proto == sel->proto || !sel->proto) && +		(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);  } -int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, -		    unsigned short family) +bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, +			 unsigned short family)  {  	switch (family) {  	case AF_INET: @@ -88,12 +88,30 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,  	case AF_INET6:  		return __xfrm6_selector_match(sel, fl);  	} -	return 0; +	return false; +} + +static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +{ +	struct xfrm_policy_afinfo *afinfo; + +	if (unlikely(family >= NPROTO)) +		return NULL; +	rcu_read_lock(); +	afinfo = rcu_dereference(xfrm_policy_afinfo[family]); +	if (unlikely(!afinfo)) +		rcu_read_unlock(); +	return afinfo; +} + +static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) +{ +	rcu_read_unlock();  }  static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, -						  xfrm_address_t *saddr, -						  xfrm_address_t *daddr, +						  const xfrm_address_t *saddr, +						  const xfrm_address_t *daddr,  						  int family)  {  	struct xfrm_policy_afinfo *afinfo; @@ -151,7 +169,7 @@ static inline unsigned long make_jiffies(long secs)  static void xfrm_policy_timer(unsigned long data)  { -	struct xfrm_policy *xp = (struct xfrm_policy*)data; +	struct xfrm_policy *xp = (struct xfrm_policy *)data;  	unsigned long now = get_seconds();  	long next = LONG_MAX;  	int warn = 0; @@ -266,8 +284,11 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)  		INIT_HLIST_NODE(&policy->byidx);  		rwlock_init(&policy->lock);  		atomic_set(&policy->refcnt, 1); +		skb_queue_head_init(&policy->polq.hold_queue);  		setup_timer(&policy->timer, xfrm_policy_timer,  				(unsigned long)policy); +		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, +			    (unsigned long)policy);  		policy->flo.ops = &xfrm_policy_fc_ops;  	}  	return policy; @@ -280,7 +301,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)  {  	BUG_ON(!policy->walk.dead); -	if (del_timer(&policy->timer)) +	if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))  		BUG();  	security_xfrm_policy_free(policy->security); @@ -288,6 +309,14 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)  }  EXPORT_SYMBOL(xfrm_policy_destroy); +static void xfrm_queue_purge(struct sk_buff_head *list) +{ +	struct sk_buff *skb; + +	while ((skb = skb_dequeue(list)) != NULL) +		kfree_skb(skb); +} +  /* Rule must be locked. Release descentant resources, announce   * entry dead. The rule must be unlinked from lists to the moment.   */ @@ -298,6 +327,10 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)  	atomic_inc(&policy->genid); +	if (del_timer(&policy->polq.hold_timer)) +		xfrm_pol_put(policy); +	xfrm_queue_purge(&policy->polq.hold_queue); +  	if (del_timer(&policy->timer))  		xfrm_pol_put(policy); @@ -311,7 +344,9 @@ static inline unsigned int idx_hash(struct net *net, u32 index)  	return __idx_hash(index, net->xfrm.policy_idx_hmask);  } -static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir) +static struct hlist_head *policy_hash_bysel(struct net *net, +					    const struct xfrm_selector *sel, +					    unsigned short family, int dir)  {  	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;  	unsigned int hash = __sel_hash(sel, family, hmask); @@ -321,7 +356,10 @@ static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selecto  		net->xfrm.policy_bydst[dir].table + hash);  } -static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +static struct hlist_head *policy_hash_direct(struct net *net, +					     const xfrm_address_t *daddr, +					     const xfrm_address_t *saddr, +					     unsigned short family, int dir)  {  	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;  	unsigned int hash = __addr_hash(daddr, saddr, family, hmask); @@ -333,27 +371,27 @@ static void xfrm_dst_hash_transfer(struct hlist_head *list,  				   struct hlist_head *ndsttable,  				   unsigned int nhashmask)  { -	struct hlist_node *entry, *tmp, *entry0 = NULL; +	struct hlist_node *tmp, *entry0 = NULL;  	struct xfrm_policy *pol;  	unsigned int h0 = 0;  redo: -	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { +	hlist_for_each_entry_safe(pol, tmp, list, bydst) {  		unsigned int h;  		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,  				pol->family, nhashmask);  		if (!entry0) { -			hlist_del(entry); +			hlist_del(&pol->bydst);  			hlist_add_head(&pol->bydst, ndsttable+h);  			h0 = h;  		} else {  			if (h != h0)  				continue; -			hlist_del(entry); +			hlist_del(&pol->bydst);  			hlist_add_after(entry0, &pol->bydst);  		} -		entry0 = entry; +		entry0 = &pol->bydst;  	}  	if (!hlist_empty(list)) {  		entry0 = NULL; @@ -365,10 +403,10 @@ static void xfrm_idx_hash_transfer(struct hlist_head *list,  				   struct hlist_head *nidxtable,  				   unsigned int nhashmask)  { -	struct hlist_node *entry, *tmp; +	struct hlist_node *tmp;  	struct xfrm_policy *pol; -	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { +	hlist_for_each_entry_safe(pol, tmp, list, byidx) {  		unsigned int h;  		h = __idx_hash(pol->index, nhashmask); @@ -393,7 +431,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)  	if (!ndst)  		return; -	write_lock_bh(&xfrm_policy_lock); +	write_lock_bh(&net->xfrm.xfrm_policy_lock);  	for (i = hmask; i >= 0; i--)  		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); @@ -401,7 +439,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)  	net->xfrm.policy_bydst[dir].table = ndst;  	net->xfrm.policy_bydst[dir].hmask = nhashmask; -	write_unlock_bh(&xfrm_policy_lock); +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));  } @@ -418,7 +456,7 @@ static void xfrm_byidx_resize(struct net *net, int total)  	if (!nidx)  		return; -	write_lock_bh(&xfrm_policy_lock); +	write_lock_bh(&net->xfrm.xfrm_policy_lock);  	for (i = hmask; i >= 0; i--)  		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); @@ -426,7 +464,7 @@ static void xfrm_byidx_resize(struct net *net, int total)  	net->xfrm.policy_byidx = nidx;  	net->xfrm.policy_idx_hmask = nhashmask; -	write_unlock_bh(&xfrm_policy_lock); +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));  } @@ -459,7 +497,7 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total)  void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)  { -	read_lock_bh(&xfrm_policy_lock); +	read_lock_bh(&net->xfrm.xfrm_policy_lock);  	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];  	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];  	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; @@ -468,7 +506,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)  	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];  	si->spdhcnt = net->xfrm.policy_idx_hmask;  	si->spdhmcnt = xfrm_policy_hashmax; -	read_unlock_bh(&xfrm_policy_lock); +	read_unlock_bh(&net->xfrm.xfrm_policy_lock);  }  EXPORT_SYMBOL(xfrm_spd_getinfo); @@ -493,24 +531,29 @@ static void xfrm_hash_resize(struct work_struct *work)  /* Generate new index... KAME seems to generate them ordered by cost   * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(struct net *net, int dir) +static u32 xfrm_gen_index(struct net *net, int dir, u32 index)  {  	static u32 idx_generator;  	for (;;) { -		struct hlist_node *entry;  		struct hlist_head *list;  		struct xfrm_policy *p;  		u32 idx;  		int found; -		idx = (idx_generator | dir); -		idx_generator += 8; +		if (!index) { +			idx = (idx_generator | dir); +			idx_generator += 8; +		} else { +			idx = index; +			index = 0; +		} +  		if (idx == 0)  			idx = 8;  		list = net->xfrm.policy_byidx + idx_hash(net, idx);  		found = 0; -		hlist_for_each_entry(p, entry, list, byidx) { +		hlist_for_each_entry(p, list, byidx) {  			if (p->index == idx) {  				found = 1;  				break; @@ -536,27 +579,68 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s  	return 0;  } +static void xfrm_policy_requeue(struct xfrm_policy *old, +				struct xfrm_policy *new) +{ +	struct xfrm_policy_queue *pq = &old->polq; +	struct sk_buff_head list; + +	__skb_queue_head_init(&list); + +	spin_lock_bh(&pq->hold_queue.lock); +	skb_queue_splice_init(&pq->hold_queue, &list); +	if (del_timer(&pq->hold_timer)) +		xfrm_pol_put(old); +	spin_unlock_bh(&pq->hold_queue.lock); + +	if (skb_queue_empty(&list)) +		return; + +	pq = &new->polq; + +	spin_lock_bh(&pq->hold_queue.lock); +	skb_queue_splice(&list, &pq->hold_queue); +	pq->timeout = XFRM_QUEUE_TMO_MIN; +	if (!mod_timer(&pq->hold_timer, jiffies)) +		xfrm_pol_hold(new); +	spin_unlock_bh(&pq->hold_queue.lock); +} + +static bool xfrm_policy_mark_match(struct xfrm_policy *policy, +				   struct xfrm_policy *pol) +{ +	u32 mark = policy->mark.v & policy->mark.m; + +	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) +		return true; + +	if ((mark & pol->mark.m) == pol->mark.v && +	    policy->priority == pol->priority) +		return true; + +	return false; +} +  int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)  {  	struct net *net = xp_net(policy);  	struct xfrm_policy *pol;  	struct xfrm_policy *delpol;  	struct hlist_head *chain; -	struct hlist_node *entry, *newpos; -	u32 mark = policy->mark.v & policy->mark.m; +	struct hlist_node *newpos; -	write_lock_bh(&xfrm_policy_lock); +	write_lock_bh(&net->xfrm.xfrm_policy_lock);  	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);  	delpol = NULL;  	newpos = NULL; -	hlist_for_each_entry(pol, entry, chain, bydst) { +	hlist_for_each_entry(pol, chain, bydst) {  		if (pol->type == policy->type &&  		    !selector_cmp(&pol->selector, &policy->selector) && -		    (mark & pol->mark.m) == pol->mark.v && +		    xfrm_policy_mark_match(policy, pol) &&  		    xfrm_sec_ctx_match(pol->security, policy->security) &&  		    !WARN_ON(delpol)) {  			if (excl) { -				write_unlock_bh(&xfrm_policy_lock); +				write_unlock_bh(&net->xfrm.xfrm_policy_lock);  				return -EEXIST;  			}  			delpol = pol; @@ -575,17 +659,26 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)  		hlist_add_head(&policy->bydst, chain);  	xfrm_pol_hold(policy);  	net->xfrm.policy_count[dir]++; -	atomic_inc(&flow_cache_genid); -	if (delpol) +	atomic_inc(&net->xfrm.flow_cache_genid); + +	/* After previous checking, family can either be AF_INET or AF_INET6 */ +	if (policy->family == AF_INET) +		rt_genid_bump_ipv4(net); +	else +		rt_genid_bump_ipv6(net); + +	if (delpol) { +		xfrm_policy_requeue(delpol, policy);  		__xfrm_policy_unlink(delpol, dir); -	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); +	} +	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);  	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));  	policy->curlft.add_time = get_seconds();  	policy->curlft.use_time = 0;  	if (!mod_timer(&policy->timer, jiffies + HZ))  		xfrm_pol_hold(policy);  	list_add(&policy->walk.all, &net->xfrm.policy_all); -	write_unlock_bh(&xfrm_policy_lock); +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  	if (delpol)  		xfrm_policy_kill(delpol); @@ -603,13 +696,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,  {  	struct xfrm_policy *pol, *ret;  	struct hlist_head *chain; -	struct hlist_node *entry;  	*err = 0; -	write_lock_bh(&xfrm_policy_lock); +	write_lock_bh(&net->xfrm.xfrm_policy_lock);  	chain = policy_hash_bysel(net, sel, sel->family, dir);  	ret = NULL; -	hlist_for_each_entry(pol, entry, chain, bydst) { +	hlist_for_each_entry(pol, chain, bydst) {  		if (pol->type == type &&  		    (mark & pol->mark.m) == pol->mark.v &&  		    !selector_cmp(sel, &pol->selector) && @@ -619,7 +711,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,  				*err = security_xfrm_policy_delete(  								pol->security);  				if (*err) { -					write_unlock_bh(&xfrm_policy_lock); +					write_unlock_bh(&net->xfrm.xfrm_policy_lock);  					return pol;  				}  				__xfrm_policy_unlink(pol, dir); @@ -628,7 +720,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,  			break;  		}  	} -	write_unlock_bh(&xfrm_policy_lock); +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  	if (ret && delete)  		xfrm_policy_kill(ret); @@ -641,17 +733,16 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,  {  	struct xfrm_policy *pol, *ret;  	struct hlist_head *chain; -	struct hlist_node *entry;  	*err = -ENOENT;  	if (xfrm_policy_id2dir(id) != dir)  		return NULL;  	*err = 0; -	write_lock_bh(&xfrm_policy_lock); +	write_lock_bh(&net->xfrm.xfrm_policy_lock);  	chain = net->xfrm.policy_byidx + idx_hash(net, id);  	ret = NULL; -	hlist_for_each_entry(pol, entry, chain, byidx) { +	hlist_for_each_entry(pol, chain, byidx) {  		if (pol->type == type && pol->index == id &&  		    (mark & pol->mark.m) == pol->mark.v) {  			xfrm_pol_hold(pol); @@ -659,7 +750,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,  				*err = security_xfrm_policy_delete(  								pol->security);  				if (*err) { -					write_unlock_bh(&xfrm_policy_lock); +					write_unlock_bh(&net->xfrm.xfrm_policy_lock);  					return pol;  				}  				__xfrm_policy_unlink(pol, dir); @@ -668,7 +759,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,  			break;  		}  	} -	write_unlock_bh(&xfrm_policy_lock); +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  	if (ret && delete)  		xfrm_policy_kill(ret); @@ -678,30 +769,26 @@ EXPORT_SYMBOL(xfrm_policy_byid);  #ifdef CONFIG_SECURITY_NETWORK_XFRM  static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)  {  	int dir, err = 0;  	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {  		struct xfrm_policy *pol; -		struct hlist_node *entry;  		int i; -		hlist_for_each_entry(pol, entry, +		hlist_for_each_entry(pol,  				     &net->xfrm.policy_inexact[dir], bydst) {  			if (pol->type != type)  				continue;  			err = security_xfrm_policy_delete(pol->security);  			if (err) { -				xfrm_audit_policy_delete(pol, 0, -							 audit_info->loginuid, -							 audit_info->sessionid, -							 audit_info->secid); +				xfrm_audit_policy_delete(pol, 0, task_valid);  				return err;  			}  		}  		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { -			hlist_for_each_entry(pol, entry, +			hlist_for_each_entry(pol,  					     net->xfrm.policy_bydst[dir].table + i,  					     bydst) {  				if (pol->type != type) @@ -710,9 +797,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi  								pol->security);  				if (err) {  					xfrm_audit_policy_delete(pol, 0, -							audit_info->loginuid, -							audit_info->sessionid, -							audit_info->secid); +								 task_valid);  					return err;  				}  			} @@ -722,64 +807,58 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi  }  #else  static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)  {  	return 0;  }  #endif -int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)  {  	int dir, err = 0, cnt = 0; -	write_lock_bh(&xfrm_policy_lock); +	write_lock_bh(&net->xfrm.xfrm_policy_lock); -	err = xfrm_policy_flush_secctx_check(net, type, audit_info); +	err = xfrm_policy_flush_secctx_check(net, type, task_valid);  	if (err)  		goto out;  	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {  		struct xfrm_policy *pol; -		struct hlist_node *entry;  		int i;  	again1: -		hlist_for_each_entry(pol, entry, +		hlist_for_each_entry(pol,  				     &net->xfrm.policy_inexact[dir], bydst) {  			if (pol->type != type)  				continue;  			__xfrm_policy_unlink(pol, dir); -			write_unlock_bh(&xfrm_policy_lock); +			write_unlock_bh(&net->xfrm.xfrm_policy_lock);  			cnt++; -			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, -						 audit_info->sessionid, -						 audit_info->secid); +			xfrm_audit_policy_delete(pol, 1, task_valid);  			xfrm_policy_kill(pol); -			write_lock_bh(&xfrm_policy_lock); +			write_lock_bh(&net->xfrm.xfrm_policy_lock);  			goto again1;  		}  		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {  	again2: -			hlist_for_each_entry(pol, entry, +			hlist_for_each_entry(pol,  					     net->xfrm.policy_bydst[dir].table + i,  					     bydst) {  				if (pol->type != type)  					continue;  				__xfrm_policy_unlink(pol, dir); -				write_unlock_bh(&xfrm_policy_lock); +				write_unlock_bh(&net->xfrm.xfrm_policy_lock);  				cnt++; -				xfrm_audit_policy_delete(pol, 1, -							 audit_info->loginuid, -							 audit_info->sessionid, -							 audit_info->secid); +				xfrm_audit_policy_delete(pol, 1, task_valid);  				xfrm_policy_kill(pol); -				write_lock_bh(&xfrm_policy_lock); +				write_lock_bh(&net->xfrm.xfrm_policy_lock);  				goto again2;  			}  		} @@ -788,7 +867,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)  	if (!cnt)  		err = -ESRCH;  out: -	write_unlock_bh(&xfrm_policy_lock); +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  	return err;  }  EXPORT_SYMBOL(xfrm_policy_flush); @@ -808,7 +887,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,  	if (list_empty(&walk->walk.all) && walk->seq != 0)  		return 0; -	write_lock_bh(&xfrm_policy_lock); +	write_lock_bh(&net->xfrm.xfrm_policy_lock);  	if (list_empty(&walk->walk.all))  		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);  	else @@ -834,7 +913,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,  	}  	list_del_init(&walk->walk.all);  out: -	write_unlock_bh(&xfrm_policy_lock); +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  	return error;  }  EXPORT_SYMBOL(xfrm_policy_walk); @@ -848,14 +927,14 @@ void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)  }  EXPORT_SYMBOL(xfrm_policy_walk_init); -void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)  {  	if (list_empty(&walk->walk.all))  		return; -	write_lock_bh(&xfrm_policy_lock); +	write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */  	list_del(&walk->walk.all); -	write_unlock_bh(&xfrm_policy_lock); +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  }  EXPORT_SYMBOL(xfrm_policy_walk_done); @@ -864,33 +943,34 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);   *   * Returns 0 if policy found, else an -errno.   */ -static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, +static int xfrm_policy_match(const struct xfrm_policy *pol, +			     const struct flowi *fl,  			     u8 type, u16 family, int dir)  { -	struct xfrm_selector *sel = &pol->selector; -	int match, ret = -ESRCH; +	const struct xfrm_selector *sel = &pol->selector; +	int ret = -ESRCH; +	bool match;  	if (pol->family != family || -	    (fl->mark & pol->mark.m) != pol->mark.v || +	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||  	    pol->type != type)  		return ret;  	match = xfrm_selector_match(sel, fl, family);  	if (match) -		ret = security_xfrm_policy_lookup(pol->security, fl->secid, +		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,  						  dir);  	return ret;  }  static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, -						     struct flowi *fl, +						     const struct flowi *fl,  						     u16 family, u8 dir)  {  	int err;  	struct xfrm_policy *pol, *ret; -	xfrm_address_t *daddr, *saddr; -	struct hlist_node *entry; +	const xfrm_address_t *daddr, *saddr;  	struct hlist_head *chain;  	u32 priority = ~0U; @@ -899,10 +979,10 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,  	if (unlikely(!daddr || !saddr))  		return NULL; -	read_lock_bh(&xfrm_policy_lock); +	read_lock_bh(&net->xfrm.xfrm_policy_lock);  	chain = policy_hash_direct(net, daddr, saddr, family, dir);  	ret = NULL; -	hlist_for_each_entry(pol, entry, chain, bydst) { +	hlist_for_each_entry(pol, chain, bydst) {  		err = xfrm_policy_match(pol, fl, type, family, dir);  		if (err) {  			if (err == -ESRCH) @@ -918,7 +998,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,  		}  	}  	chain = &net->xfrm.policy_inexact[dir]; -	hlist_for_each_entry(pol, entry, chain, bydst) { +	hlist_for_each_entry(pol, chain, bydst) {  		err = xfrm_policy_match(pol, fl, type, family, dir);  		if (err) {  			if (err == -ESRCH) @@ -935,13 +1015,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,  	if (ret)  		xfrm_pol_hold(ret);  fail: -	read_unlock_bh(&xfrm_policy_lock); +	read_unlock_bh(&net->xfrm.xfrm_policy_lock);  	return ret;  }  static struct xfrm_policy * -__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir) +__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)  {  #ifdef CONFIG_XFRM_SUB_POLICY  	struct xfrm_policy *pol; @@ -953,8 +1033,26 @@ __xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)  	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);  } +static int flow_to_policy_dir(int dir) +{ +	if (XFRM_POLICY_IN == FLOW_DIR_IN && +	    XFRM_POLICY_OUT == FLOW_DIR_OUT && +	    XFRM_POLICY_FWD == FLOW_DIR_FWD) +		return dir; + +	switch (dir) { +	default: +	case FLOW_DIR_IN: +		return XFRM_POLICY_IN; +	case FLOW_DIR_OUT: +		return XFRM_POLICY_OUT; +	case FLOW_DIR_FWD: +		return XFRM_POLICY_FWD; +	} +} +  static struct flow_cache_object * -xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,  		   u8 dir, struct flow_cache_object *old_obj, void *ctx)  {  	struct xfrm_policy *pol; @@ -962,7 +1060,7 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,  	if (old_obj)  		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); -	pol = __xfrm_policy_lookup(net, fl, family, dir); +	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));  	if (IS_ERR_OR_NULL(pol))  		return ERR_CAST(pol); @@ -990,14 +1088,16 @@ static inline int policy_to_flow_dir(int dir)  	}  } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, +						 const struct flowi *fl)  {  	struct xfrm_policy *pol; +	struct net *net = sock_net(sk); -	read_lock_bh(&xfrm_policy_lock); +	read_lock_bh(&net->xfrm.xfrm_policy_lock);  	if ((pol = sk->sk_policy[dir]) != NULL) { -		int match = xfrm_selector_match(&pol->selector, fl, -						sk->sk_family); +		bool match = xfrm_selector_match(&pol->selector, fl, +						 sk->sk_family);  		int err = 0;  		if (match) { @@ -1006,7 +1106,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc  				goto out;  			}  			err = security_xfrm_policy_lookup(pol->security, -						      fl->secid, +						      fl->flowi_secid,  						      policy_to_flow_dir(dir));  			if (!err)  				xfrm_pol_hold(pol); @@ -1018,7 +1118,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc  			pol = NULL;  	}  out: -	read_unlock_bh(&xfrm_policy_lock); +	read_unlock_bh(&net->xfrm.xfrm_policy_lock);  	return pol;  } @@ -1046,7 +1146,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,  	if (hlist_unhashed(&pol->bydst))  		return NULL; -	hlist_del(&pol->bydst); +	hlist_del_init(&pol->bydst);  	hlist_del(&pol->byidx);  	list_del(&pol->walk.all);  	net->xfrm.policy_count[dir]--; @@ -1056,9 +1156,11 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,  int xfrm_policy_delete(struct xfrm_policy *pol, int dir)  { -	write_lock_bh(&xfrm_policy_lock); +	struct net *net = xp_net(pol); + +	write_lock_bh(&net->xfrm.xfrm_policy_lock);  	pol = __xfrm_policy_unlink(pol, dir); -	write_unlock_bh(&xfrm_policy_lock); +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  	if (pol) {  		xfrm_policy_kill(pol);  		return 0; @@ -1077,20 +1179,24 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)  		return -EINVAL;  #endif -	write_lock_bh(&xfrm_policy_lock); +	write_lock_bh(&net->xfrm.xfrm_policy_lock);  	old_pol = sk->sk_policy[dir];  	sk->sk_policy[dir] = pol;  	if (pol) {  		pol->curlft.add_time = get_seconds(); -		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); +		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);  		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);  	} -	if (old_pol) +	if (old_pol) { +		if (pol) +			xfrm_policy_requeue(old_pol, pol); +  		/* Unlinking succeeds always. This is the only function  		 * allowed to delete or replace socket policy.  		 */  		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); -	write_unlock_bh(&xfrm_policy_lock); +	} +	write_unlock_bh(&net->xfrm.xfrm_policy_lock);  	if (old_pol) {  		xfrm_policy_kill(old_pol); @@ -1098,9 +1204,10 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)  	return 0;  } -static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) +static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)  {  	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); +	struct net *net = xp_net(old);  	if (newp) {  		newp->selector = old->selector; @@ -1119,9 +1226,9 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)  		newp->type = old->type;  		memcpy(newp->xfrm_vec, old->xfrm_vec,  		       newp->xfrm_nr*sizeof(struct xfrm_tmpl)); -		write_lock_bh(&xfrm_policy_lock); +		write_lock_bh(&net->xfrm.xfrm_policy_lock);  		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); -		write_unlock_bh(&xfrm_policy_lock); +		write_unlock_bh(&net->xfrm.xfrm_policy_lock);  		xfrm_pol_put(newp);  	}  	return newp; @@ -1157,9 +1264,8 @@ xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,  /* Resolve list of templates for the flow, given policy. */  static int -xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, -		      struct xfrm_state **xfrm, -		      unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, +		      struct xfrm_state **xfrm, unsigned short family)  {  	struct net *net = xp_net(policy);  	int nx; @@ -1168,7 +1274,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,  	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);  	xfrm_address_t tmp; -	for (nx=0, i = 0; i < policy->xfrm_nr; i++) { +	for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {  		struct xfrm_state *x;  		xfrm_address_t *remote = daddr;  		xfrm_address_t *local  = saddr; @@ -1198,9 +1304,9 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,  			error = (x->km.state == XFRM_STATE_ERROR ?  				 -EINVAL : -EAGAIN);  			xfrm_state_put(x); -		} -		else if (error == -ESRCH) +		} else if (error == -ESRCH) {  			error = -EAGAIN; +		}  		if (!tmpl->optional)  			goto fail; @@ -1208,15 +1314,14 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,  	return nx;  fail: -	for (nx--; nx>=0; nx--) +	for (nx--; nx >= 0; nx--)  		xfrm_state_put(xfrm[nx]);  	return error;  }  static int -xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, -		  struct xfrm_state **xfrm, -		  unsigned short family) +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, +		  struct xfrm_state **xfrm, unsigned short family)  {  	struct xfrm_state *tp[XFRM_MAX_DEPTH];  	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; @@ -1246,7 +1351,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,  	return cnx;   fail: -	for (cnx--; cnx>=0; cnx--) +	for (cnx--; cnx >= 0; cnx--)  		xfrm_state_put(tpp[cnx]);  	return error; @@ -1256,7 +1361,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,   * still valid.   */ -static inline int xfrm_get_tos(struct flowi *fl, int family) +static inline int xfrm_get_tos(const struct flowi *fl, int family)  {  	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);  	int tos; @@ -1282,6 +1387,8 @@ static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *f  		 * It means we need to try again resolving. */  		if (xdst->num_xfrms > 0)  			return NULL; +	} else if (dst->flags & DST_XFRM_QUEUE) { +		return NULL;  	} else {  		/* Real bundle */  		if (stale_bundle(dst)) @@ -1332,7 +1439,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)  	case AF_INET:  		dst_ops = &net->xfrm.xfrm4_dst_ops;  		break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  	case AF_INET6:  		dst_ops = &net->xfrm.xfrm6_dst_ops;  		break; @@ -1340,10 +1447,19 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)  	default:  		BUG();  	} -	xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); -	xfrm_policy_put_afinfo(afinfo); +	xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); -	xdst->flo.ops = &xfrm_bundle_fc_ops; +	if (likely(xdst)) { +		struct dst_entry *dst = &xdst->u.dst; + +		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); +		xdst->flo.ops = &xfrm_bundle_fc_ops; +		if (afinfo->init_dst) +			afinfo->init_dst(net, xdst); +	} else +		xdst = ERR_PTR(-ENOBUFS); + +	xfrm_policy_put_afinfo(afinfo);  	return xdst;  } @@ -1366,7 +1482,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,  }  static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, -				struct flowi *fl) +				const struct flowi *fl)  {  	struct xfrm_policy_afinfo *afinfo =  		xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1389,12 +1505,13 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,  static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,  					    struct xfrm_state **xfrm, int nx, -					    struct flowi *fl, +					    const struct flowi *fl,  					    struct dst_entry *dst)  {  	struct net *net = xp_net(policy);  	unsigned long now = jiffies;  	struct net_device *dev; +	struct xfrm_mode *inner_mode;  	struct dst_entry *dst_prev = NULL;  	struct dst_entry *dst0 = NULL;  	int i = 0; @@ -1425,6 +1542,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,  			goto put_states;  		} +		if (xfrm[i]->sel.family == AF_UNSPEC) { +			inner_mode = xfrm_ip2inner_mode(xfrm[i], +							xfrm_af2proto(family)); +			if (!inner_mode) { +				err = -EAFNOSUPPORT; +				dst_release(dst); +				goto put_states; +			} +		} else +			inner_mode = xfrm[i]->inner_mode; +  		if (!dst_prev)  			dst0 = dst1;  		else { @@ -1433,7 +1561,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,  		}  		xdst->route = dst; -		memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics)); +		dst_copy_metrics(dst1, dst);  		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {  			family = xfrm[i]->props.family; @@ -1448,12 +1576,12 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,  		dst1->xfrm = xfrm[i];  		xdst->xfrm_genid = xfrm[i]->genid; -		dst1->obsolete = -1; +		dst1->obsolete = DST_OBSOLETE_FORCE_CHK;  		dst1->flags |= DST_HOST;  		dst1->lastuse = now;  		dst1->input = dst_discard; -		dst1->output = xfrm[i]->outer_mode->afinfo->output; +		dst1->output = inner_mode->afinfo->output;  		dst1->next = dst_prev;  		dst_prev = dst1; @@ -1472,9 +1600,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,  	if (!dev)  		goto free_dst; -	/* Copy neighbour for reachability confirmation */ -	dst0->neighbour = neigh_clone(dst->neighbour); -  	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);  	xfrm_init_pmtu(dst_prev); @@ -1504,20 +1629,22 @@ free_dst:  	goto out;  } -static int inline -xfrm_dst_alloc_copy(void **target, void *src, int size) +#ifdef CONFIG_XFRM_SUB_POLICY +static int xfrm_dst_alloc_copy(void **target, const void *src, int size)  {  	if (!*target) {  		*target = kmalloc(size, GFP_ATOMIC);  		if (!*target)  			return -ENOMEM;  	} +  	memcpy(*target, src, size);  	return 0;  } +#endif -static int inline -xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) +static int xfrm_dst_update_parent(struct dst_entry *dst, +				  const struct xfrm_selector *sel)  {  #ifdef CONFIG_XFRM_SUB_POLICY  	struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1528,8 +1655,8 @@ xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)  #endif  } -static int inline -xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) +static int xfrm_dst_update_origin(struct dst_entry *dst, +				  const struct flowi *fl)  {  #ifdef CONFIG_XFRM_SUB_POLICY  	struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1539,7 +1666,7 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)  #endif  } -static int xfrm_expand_policies(struct flowi *fl, u16 family, +static int xfrm_expand_policies(const struct flowi *fl, u16 family,  				struct xfrm_policy **pols,  				int *num_pols, int *num_xfrms)  { @@ -1567,7 +1694,7 @@ static int xfrm_expand_policies(struct flowi *fl, u16 family,  				xfrm_pols_put(pols, *num_pols);  				return PTR_ERR(pols[1]);  			} -			(*num_pols) ++; +			(*num_pols)++;  			(*num_xfrms) += pols[1]->xfrm_nr;  		}  	} @@ -1585,7 +1712,7 @@ static int xfrm_expand_policies(struct flowi *fl, u16 family,  static struct xfrm_dst *  xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, -			       struct flowi *fl, u16 family, +			       const struct flowi *fl, u16 family,  			       struct dst_entry *dst_orig)  {  	struct net *net = xp_net(pols[0]); @@ -1621,14 +1748,191 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,  	}  	xdst->num_pols = num_pols; -	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); +	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);  	xdst->policy_genid = atomic_read(&pols[0]->genid);  	return xdst;  } +static void xfrm_policy_queue_process(unsigned long arg) +{ +	int err = 0; +	struct sk_buff *skb; +	struct sock *sk; +	struct dst_entry *dst; +	struct xfrm_policy *pol = (struct xfrm_policy *)arg; +	struct xfrm_policy_queue *pq = &pol->polq; +	struct flowi fl; +	struct sk_buff_head list; + +	spin_lock(&pq->hold_queue.lock); +	skb = skb_peek(&pq->hold_queue); +	if (!skb) { +		spin_unlock(&pq->hold_queue.lock); +		goto out; +	} +	dst = skb_dst(skb); +	sk = skb->sk; +	xfrm_decode_session(skb, &fl, dst->ops->family); +	spin_unlock(&pq->hold_queue.lock); + +	dst_hold(dst->path); +	dst = xfrm_lookup(xp_net(pol), dst->path, &fl, +			  sk, 0); +	if (IS_ERR(dst)) +		goto purge_queue; + +	if (dst->flags & DST_XFRM_QUEUE) { +		dst_release(dst); + +		if (pq->timeout >= XFRM_QUEUE_TMO_MAX) +			goto purge_queue; + +		pq->timeout = pq->timeout << 1; +		if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) +			xfrm_pol_hold(pol); +	goto out; +	} + +	dst_release(dst); + +	__skb_queue_head_init(&list); + +	spin_lock(&pq->hold_queue.lock); +	pq->timeout = 0; +	skb_queue_splice_init(&pq->hold_queue, &list); +	spin_unlock(&pq->hold_queue.lock); + +	while (!skb_queue_empty(&list)) { +		skb = __skb_dequeue(&list); + +		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); +		dst_hold(skb_dst(skb)->path); +		dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, +				  &fl, skb->sk, 0); +		if (IS_ERR(dst)) { +			kfree_skb(skb); +			continue; +		} + +		nf_reset(skb); +		skb_dst_drop(skb); +		skb_dst_set(skb, dst); + +		err = dst_output(skb); +	} + +out: +	xfrm_pol_put(pol); +	return; + +purge_queue: +	pq->timeout = 0; +	xfrm_queue_purge(&pq->hold_queue); +	xfrm_pol_put(pol); +} + +static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) +{ +	unsigned long sched_next; +	struct dst_entry *dst = skb_dst(skb); +	struct xfrm_dst *xdst = (struct xfrm_dst *) dst; +	struct xfrm_policy *pol = xdst->pols[0]; +	struct xfrm_policy_queue *pq = &pol->polq; +	const struct sk_buff *fclone = skb + 1; + +	if (unlikely(skb->fclone == SKB_FCLONE_ORIG && +		     fclone->fclone == SKB_FCLONE_CLONE)) { +		kfree_skb(skb); +		return 0; +	} + +	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { +		kfree_skb(skb); +		return -EAGAIN; +	} + +	skb_dst_force(skb); + +	spin_lock_bh(&pq->hold_queue.lock); + +	if (!pq->timeout) +		pq->timeout = XFRM_QUEUE_TMO_MIN; + +	sched_next = jiffies + pq->timeout; + +	if (del_timer(&pq->hold_timer)) { +		if (time_before(pq->hold_timer.expires, sched_next)) +			sched_next = pq->hold_timer.expires; +		xfrm_pol_put(pol); +	} + +	__skb_queue_tail(&pq->hold_queue, skb); +	if (!mod_timer(&pq->hold_timer, sched_next)) +		xfrm_pol_hold(pol); + +	spin_unlock_bh(&pq->hold_queue.lock); + +	return 0; +} + +static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, +						 struct dst_entry *dst, +						 const struct flowi *fl, +						 int num_xfrms, +						 u16 family) +{ +	int err; +	struct net_device *dev; +	struct dst_entry *dst1; +	struct xfrm_dst *xdst; + +	xdst = xfrm_alloc_dst(net, family); +	if (IS_ERR(xdst)) +		return xdst; + +	if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) +		return xdst; + +	dst1 = &xdst->u.dst; +	dst_hold(dst); +	xdst->route = dst; + +	dst_copy_metrics(dst1, dst); + +	dst1->obsolete = DST_OBSOLETE_FORCE_CHK; +	dst1->flags |= DST_HOST | DST_XFRM_QUEUE; +	dst1->lastuse = jiffies; + +	dst1->input = dst_discard; +	dst1->output = xdst_queue_output; + +	dst_hold(dst); +	dst1->child = dst; +	dst1->path = dst; + +	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); + +	err = -ENODEV; +	dev = dst->dev; +	if (!dev) +		goto free_dst; + +	err = xfrm_fill_dst(xdst, dev, fl); +	if (err) +		goto free_dst; + +out: +	return xdst; + +free_dst: +	dst_release(dst1); +	xdst = ERR_PTR(err); +	goto out; +} +  static struct flow_cache_object * -xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,  		   struct flow_cache_object *oldflo, void *ctx)  {  	struct dst_entry *dst_orig = (struct dst_entry *)ctx; @@ -1660,7 +1964,8 @@ xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,  	 * previous cache entry */  	if (xdst == NULL) {  		num_pols = 1; -		pols[0] = __xfrm_policy_lookup(net, fl, family, dir); +		pols[0] = __xfrm_policy_lookup(net, fl, family, +					       flow_to_policy_dir(dir));  		err = xfrm_expand_policies(fl, family, pols,  					   &num_pols, &num_xfrms);  		if (err < 0) @@ -1705,14 +2010,14 @@ make_dummy_bundle:  	/* We found policies, but there's no bundles to instantiate:  	 * either because the policy blocks, has no transformations or  	 * we could not build template (no xfrm_states).*/ -	xdst = xfrm_alloc_dst(net, family); +	xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);  	if (IS_ERR(xdst)) {  		xfrm_pols_put(pols, num_pols);  		return ERR_CAST(xdst);  	}  	xdst->num_pols = num_pols;  	xdst->num_xfrms = num_xfrms; -	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); +	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);  	dst_hold(&xdst->u.dst);  	return &xdst->flo; @@ -1727,23 +2032,40 @@ error:  	return ERR_PTR(err);  } +static struct dst_entry *make_blackhole(struct net *net, u16 family, +					struct dst_entry *dst_orig) +{ +	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); +	struct dst_entry *ret; + +	if (!afinfo) { +		dst_release(dst_orig); +		return ERR_PTR(-EINVAL); +	} else { +		ret = afinfo->blackhole_route(net, dst_orig); +	} +	xfrm_policy_put_afinfo(afinfo); + +	return ret; +} +  /* Main function: finds/creates a bundle for given flow.   *   * At the moment we eat a raw IP route. Mostly to speed up lookups   * on interfaces with disabled IPsec.   */ -int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, -		  struct sock *sk, int flags) +struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, +			      const struct flowi *fl, +			      struct sock *sk, int flags)  {  	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];  	struct flow_cache_object *flo;  	struct xfrm_dst *xdst; -	struct dst_entry *dst, *dst_orig = *dst_p, *route; +	struct dst_entry *dst, *route;  	u16 family = dst_orig->ops->family;  	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);  	int i, err, num_pols, num_xfrms = 0, drop_pols = 0; -restart:  	dst = NULL;  	xdst = NULL;  	route = NULL; @@ -1775,11 +2097,8 @@ restart:  				goto no_transform;  			} -			spin_lock_bh(&xfrm_policy_sk_bundle_lock); -			xdst->u.dst.next = xfrm_policy_sk_bundles; -			xfrm_policy_sk_bundles = &xdst->u.dst; -			spin_unlock_bh(&xfrm_policy_sk_bundle_lock); - +			dst_hold(&xdst->u.dst); +			xdst->u.dst.flags |= DST_NOCACHE;  			route = xdst->route;  		}  	} @@ -1802,7 +2121,7 @@ restart:  		num_pols = xdst->num_pols;  		num_xfrms = xdst->num_xfrms; -		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols); +		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);  		route = xdst->route;  	} @@ -1815,30 +2134,14 @@ restart:  		 * have the xfrm_state's. We need to wait for KM to  		 * negotiate new SA's or bail out with error.*/  		if (net->xfrm.sysctl_larval_drop) { -			/* EREMOTE tells the caller to generate -			 * a one-shot blackhole route. */  			dst_release(dst);  			xfrm_pols_put(pols, drop_pols);  			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); -			return -EREMOTE; -		} -		if (flags & XFRM_LOOKUP_WAIT) { -			DECLARE_WAITQUEUE(wait, current); -			add_wait_queue(&net->xfrm.km_waitq, &wait); -			set_current_state(TASK_INTERRUPTIBLE); -			schedule(); -			set_current_state(TASK_RUNNING); -			remove_wait_queue(&net->xfrm.km_waitq, &wait); - -			if (!signal_pending(current)) { -				dst_release(dst); -				goto restart; -			} +			return make_blackhole(net, family, dst_orig); +		} -			err = -ERESTART; -		} else -			err = -EAGAIN; +		err = -EAGAIN;  		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);  		goto error; @@ -1864,47 +2167,36 @@ no_transform:  		goto error;  	} else if (num_xfrms > 0) {  		/* Flow transformed */ -		*dst_p = dst;  		dst_release(dst_orig);  	} else {  		/* Flow passes untransformed */  		dst_release(dst); +		dst = dst_orig;  	}  ok:  	xfrm_pols_put(pols, drop_pols); -	return 0; +	if (dst && dst->xfrm && +	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL) +		dst->flags |= DST_XFRM_TUNNEL; +	return dst;  nopol: -	if (!(flags & XFRM_LOOKUP_ICMP)) +	if (!(flags & XFRM_LOOKUP_ICMP)) { +		dst = dst_orig;  		goto ok; +	}  	err = -ENOENT;  error:  	dst_release(dst);  dropdst:  	dst_release(dst_orig); -	*dst_p = NULL;  	xfrm_pols_put(pols, drop_pols); -	return err; -} -EXPORT_SYMBOL(__xfrm_lookup); - -int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, -		struct sock *sk, int flags) -{ -	int err = __xfrm_lookup(net, dst_p, fl, sk, flags); - -	if (err == -EREMOTE) { -		dst_release(*dst_p); -		*dst_p = NULL; -		err = -EAGAIN; -	} - -	return err; +	return ERR_PTR(err);  }  EXPORT_SYMBOL(xfrm_lookup);  static inline int -xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) +xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)  {  	struct xfrm_state *x; @@ -1923,7 +2215,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)   */  static inline int -xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, +xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,  	      unsigned short family)  {  	if (xfrm_state_kern(x)) @@ -1946,7 +2238,7 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,   * Otherwise "-2 - errored_index" is returned.   */  static inline int -xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, +xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,  	       unsigned short family)  {  	int idx = start; @@ -1978,13 +2270,13 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,  		return -EAFNOSUPPORT;  	afinfo->decode_session(skb, fl, reverse); -	err = security_xfrm_decode_session(skb, &fl->secid); +	err = security_xfrm_decode_session(skb, &fl->flowi_secid);  	xfrm_policy_put_afinfo(afinfo);  	return err;  }  EXPORT_SYMBOL(__xfrm_decode_session); -static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) +static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)  {  	for (; k < sp->len; k++) {  		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { @@ -2025,7 +2317,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,  	if (skb->sp) {  		int i; -		for (i=skb->sp->len-1; i>=0; i--) { +		for (i = skb->sp->len-1; i >= 0; i--) {  			struct xfrm_state *x = skb->sp->xvec[i];  			if (!xfrm_selector_match(&x->sel, &fl, family)) {  				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); @@ -2071,7 +2363,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,  	pol->curlft.use_time = get_seconds();  	pols[0] = pol; -	npols ++; +	npols++;  #ifdef CONFIG_XFRM_SUB_POLICY  	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {  		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, @@ -2083,7 +2375,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,  				return 0;  			}  			pols[1]->curlft.use_time = get_seconds(); -			npols ++; +			npols++;  		}  	}  #endif @@ -2115,7 +2407,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,  		}  		xfrm_nr = ti;  		if (npols > 1) { -			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); +			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);  			tpp = stp;  		} @@ -2159,7 +2451,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)  	struct net *net = dev_net(skb->dev);  	struct flowi fl;  	struct dst_entry *dst; -	int res; +	int res = 1;  	if (xfrm_decode_session(skb, &fl, family) < 0) {  		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); @@ -2167,9 +2459,12 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)  	}  	skb_dst_force(skb); -	dst = skb_dst(skb); -	res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; +	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); +	if (IS_ERR(dst)) { +		res = 0; +		dst = NULL; +	}  	skb_dst_set(skb, dst);  	return res;  } @@ -2180,12 +2475,13 @@ EXPORT_SYMBOL(__xfrm_route_forward);  static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)  {  	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete -	 * to "-1" to force all XFRM destinations to get validated by -	 * dst_ops->check on every use.  We do this because when a -	 * normal route referenced by an XFRM dst is obsoleted we do -	 * not go looking around for all parent referencing XFRM dsts -	 * so that we can invalidate them.  It is just too much work. -	 * Instead we make the checks here on every use.  For example: +	 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to +	 * get validated by dst_ops->check on every use.  We do this +	 * because when a normal route referenced by an XFRM dst is +	 * obsoleted we do not go looking around for all parent +	 * referencing XFRM dsts so that we can invalidate them.  It +	 * is just too much work.  Instead we make the checks here on +	 * every use.  For example:  	 *  	 *	XFRM dst A --> IPv4 dst X  	 * @@ -2195,9 +2491,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)  	 * stale_bundle() check.  	 *  	 * When a policy's bundle is pruned, we dst_free() the XFRM -	 * dst which causes it's ->obsolete field to be set to a -	 * positive non-zero integer.  If an XFRM dst has been pruned -	 * like this, we want to force a new route lookup. +	 * dst which causes it's ->obsolete field to be set to +	 * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like +	 * this, we want to force a new route lookup.  	 */  	if (dst->obsolete < 0 && !stale_bundle(dst))  		return dst; @@ -2207,7 +2503,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)  static int stale_bundle(struct dst_entry *dst)  { -	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); +	return !xfrm_bundle_ok((struct xfrm_dst *)dst);  }  void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -2236,22 +2532,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)  	return dst;  } -static void __xfrm_garbage_collect(struct net *net) +void xfrm_garbage_collect(struct net *net)  { -	struct dst_entry *head, *next; - -	flow_cache_flush(); - -	spin_lock_bh(&xfrm_policy_sk_bundle_lock); -	head = xfrm_policy_sk_bundles; -	xfrm_policy_sk_bundles = NULL; -	spin_unlock_bh(&xfrm_policy_sk_bundle_lock); +	flow_cache_flush(net); +} +EXPORT_SYMBOL(xfrm_garbage_collect); -	while (head) { -		next = head->next; -		dst_free(head); -		head = next; -	} +static void xfrm_garbage_collect_deferred(struct net *net) +{ +	flow_cache_flush_deferred(net);  }  static void xfrm_init_pmtu(struct dst_entry *dst) @@ -2271,7 +2560,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)  		if (pmtu > route_mtu_cached)  			pmtu = route_mtu_cached; -		dst->metrics[RTAX_MTU-1] = pmtu; +		dst_metric_set(dst, RTAX_MTU, pmtu);  	} while ((dst = dst->next));  } @@ -2279,8 +2568,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)   * still valid.   */ -static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, -		struct flowi *fl, int family, int strict) +static int xfrm_bundle_ok(struct xfrm_dst *first)  {  	struct dst_entry *dst = &first->u.dst;  	struct xfrm_dst *last; @@ -2289,26 +2577,15 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,  	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||  	    (dst->dev && !netif_running(dst->dev)))  		return 0; -#ifdef CONFIG_XFRM_SUB_POLICY -	if (fl) { -		if (first->origin && !flow_cache_uli_match(first->origin, fl)) -			return 0; -		if (first->partner && -		    !xfrm_selector_match(first->partner, fl, family)) -			return 0; -	} -#endif + +	if (dst->flags & DST_XFRM_QUEUE) +		return 1;  	last = NULL;  	do {  		struct xfrm_dst *xdst = (struct xfrm_dst *)dst; -		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) -			return 0; -		if (fl && pol && -		    !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl)) -			return 0;  		if (dst->xfrm->km.state != XFRM_STATE_VALID)  			return 0;  		if (xdst->xfrm_genid != dst->xfrm->genid) @@ -2317,11 +2594,6 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,  		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))  			return 0; -		if (strict && fl && -		    !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && -		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) -			return 0; -  		mtu = dst_mtu(dst->child);  		if (xdst->child_mtu_cached != mtu) {  			last = xdst; @@ -2349,7 +2621,7 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,  		mtu = xfrm_state_mtu(dst->xfrm, mtu);  		if (mtu > last->route_mtu_cached)  			mtu = last->route_mtu_cached; -		dst->metrics[RTAX_MTU-1] = mtu; +		dst_metric_set(dst, RTAX_MTU, mtu);  		if (last == first)  			break; @@ -2361,6 +2633,25 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,  	return 1;  } +static unsigned int xfrm_default_advmss(const struct dst_entry *dst) +{ +	return dst_metric_advmss(dst->path); +} + +static unsigned int xfrm_mtu(const struct dst_entry *dst) +{ +	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + +	return mtu ? : dst_mtu(dst->path); +} + +static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, +					   struct sk_buff *skb, +					   const void *daddr) +{ +	return dst->path->ops->neigh_lookup(dst, skb, daddr); +} +  int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)  {  	struct net *net; @@ -2369,7 +2660,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)  		return -EINVAL;  	if (unlikely(afinfo->family >= NPROTO))  		return -EAFNOSUPPORT; -	write_lock_bh(&xfrm_policy_afinfo_lock); +	spin_lock(&xfrm_policy_afinfo_lock);  	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))  		err = -ENOBUFS;  	else { @@ -2378,15 +2669,21 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)  			dst_ops->kmem_cachep = xfrm_dst_cache;  		if (likely(dst_ops->check == NULL))  			dst_ops->check = xfrm_dst_check; +		if (likely(dst_ops->default_advmss == NULL)) +			dst_ops->default_advmss = xfrm_default_advmss; +		if (likely(dst_ops->mtu == NULL)) +			dst_ops->mtu = xfrm_mtu;  		if (likely(dst_ops->negative_advice == NULL))  			dst_ops->negative_advice = xfrm_negative_advice;  		if (likely(dst_ops->link_failure == NULL))  			dst_ops->link_failure = xfrm_link_failure; +		if (likely(dst_ops->neigh_lookup == NULL)) +			dst_ops->neigh_lookup = xfrm_neigh_lookup;  		if (likely(afinfo->garbage_collect == NULL)) -			afinfo->garbage_collect = __xfrm_garbage_collect; -		xfrm_policy_afinfo[afinfo->family] = afinfo; +			afinfo->garbage_collect = xfrm_garbage_collect_deferred; +		rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);  	} -	write_unlock_bh(&xfrm_policy_afinfo_lock); +	spin_unlock(&xfrm_policy_afinfo_lock);  	rtnl_lock();  	for_each_net(net) { @@ -2396,7 +2693,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)  		case AF_INET:  			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;  			break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  		case AF_INET6:  			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;  			break; @@ -2419,21 +2716,26 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)  		return -EINVAL;  	if (unlikely(afinfo->family >= NPROTO))  		return -EAFNOSUPPORT; -	write_lock_bh(&xfrm_policy_afinfo_lock); +	spin_lock(&xfrm_policy_afinfo_lock);  	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {  		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))  			err = -EINVAL; -		else { -			struct dst_ops *dst_ops = afinfo->dst_ops; -			xfrm_policy_afinfo[afinfo->family] = NULL; -			dst_ops->kmem_cachep = NULL; -			dst_ops->check = NULL; -			dst_ops->negative_advice = NULL; -			dst_ops->link_failure = NULL; -			afinfo->garbage_collect = NULL; -		} +		else +			RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], +					 NULL); +	} +	spin_unlock(&xfrm_policy_afinfo_lock); +	if (!err) { +		struct dst_ops *dst_ops = afinfo->dst_ops; + +		synchronize_rcu(); + +		dst_ops->kmem_cachep = NULL; +		dst_ops->check = NULL; +		dst_ops->negative_advice = NULL; +		dst_ops->link_failure = NULL; +		afinfo->garbage_collect = NULL;  	} -	write_unlock_bh(&xfrm_policy_afinfo_lock);  	return err;  }  EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); @@ -2442,42 +2744,25 @@ static void __net_init xfrm_dst_ops_init(struct net *net)  {  	struct xfrm_policy_afinfo *afinfo; -	read_lock_bh(&xfrm_policy_afinfo_lock); -	afinfo = xfrm_policy_afinfo[AF_INET]; +	rcu_read_lock(); +	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);  	if (afinfo)  		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -	afinfo = xfrm_policy_afinfo[AF_INET6]; +#if IS_ENABLED(CONFIG_IPV6) +	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);  	if (afinfo)  		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;  #endif -	read_unlock_bh(&xfrm_policy_afinfo_lock); -} - -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) -{ -	struct xfrm_policy_afinfo *afinfo; -	if (unlikely(family >= NPROTO)) -		return NULL; -	read_lock(&xfrm_policy_afinfo_lock); -	afinfo = xfrm_policy_afinfo[family]; -	if (unlikely(!afinfo)) -		read_unlock(&xfrm_policy_afinfo_lock); -	return afinfo; -} - -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) -{ -	read_unlock(&xfrm_policy_afinfo_lock); +	rcu_read_unlock();  }  static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)  { -	struct net_device *dev = ptr; +	struct net_device *dev = netdev_notifier_info_to_dev(ptr);  	switch (event) {  	case NETDEV_DOWN: -		__xfrm_garbage_collect(dev_net(dev)); +		xfrm_garbage_collect(dev_net(dev));  	}  	return NOTIFY_DONE;  } @@ -2490,21 +2775,19 @@ static struct notifier_block xfrm_dev_notifier = {  static int __net_init xfrm_statistics_init(struct net *net)  {  	int rv; - -	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, -			  sizeof(struct linux_xfrm_mib), -			  __alignof__(struct linux_xfrm_mib)) < 0) +	net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); +	if (!net->mib.xfrm_statistics)  		return -ENOMEM;  	rv = xfrm_proc_init(net);  	if (rv < 0) -		snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); +		free_percpu(net->mib.xfrm_statistics);  	return rv;  }  static void xfrm_statistics_fini(struct net *net)  {  	xfrm_proc_fini(net); -	snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); +	free_percpu(net->mib.xfrm_statistics);  }  #else  static int __net_init xfrm_statistics_init(struct net *net) @@ -2569,21 +2852,14 @@ out_byidx:  static void xfrm_policy_fini(struct net *net)  { -	struct xfrm_audit audit_info;  	unsigned int sz;  	int dir;  	flush_work(&net->xfrm.policy_hash_work);  #ifdef CONFIG_XFRM_SUB_POLICY -	audit_info.loginuid = -1; -	audit_info.sessionid = -1; -	audit_info.secid = 0; -	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); +	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);  #endif -	audit_info.loginuid = -1; -	audit_info.sessionid = -1; -	audit_info.secid = 0; -	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); +	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);  	WARN_ON(!list_empty(&net->xfrm.policy_all)); @@ -2593,7 +2869,7 @@ static void xfrm_policy_fini(struct net *net)  		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));  		htab = &net->xfrm.policy_bydst[dir]; -		sz = (htab->hmask + 1); +		sz = (htab->hmask + 1) * sizeof(struct hlist_head);  		WARN_ON(!hlist_empty(htab->table));  		xfrm_hash_free(htab->table, sz);  	} @@ -2620,8 +2896,19 @@ static int __net_init xfrm_net_init(struct net *net)  	rv = xfrm_sysctl_init(net);  	if (rv < 0)  		goto out_sysctl; +	rv = flow_cache_init(net); +	if (rv < 0) +		goto out; + +	/* Initialize the per-net locks here */ +	spin_lock_init(&net->xfrm.xfrm_state_lock); +	rwlock_init(&net->xfrm.xfrm_policy_lock); +	mutex_init(&net->xfrm.xfrm_cfg_mutex); +  	return 0; +out: +	xfrm_sysctl_fini(net);  out_sysctl:  	xfrm_policy_fini(net);  out_policy: @@ -2634,6 +2921,7 @@ out_statistics:  static void __net_exit xfrm_net_exit(struct net *net)  { +	flow_cache_fini(net);  	xfrm_sysctl_fini(net);  	xfrm_policy_fini(net);  	xfrm_state_fini(net); @@ -2662,7 +2950,7 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,  		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",  				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); -	switch(sel->family) { +	switch (sel->family) {  	case AF_INET:  		audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);  		if (sel->prefixlen_s != 32) @@ -2686,15 +2974,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,  	}  } -void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, -			   uid_t auid, u32 sessionid, u32 secid) +void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)  {  	struct audit_buffer *audit_buf;  	audit_buf = xfrm_audit_start("SPD-add");  	if (audit_buf == NULL)  		return; -	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); +	xfrm_audit_helper_usrinfo(task_valid, audit_buf);  	audit_log_format(audit_buf, " res=%u", result);  	xfrm_audit_common_policyinfo(xp, audit_buf);  	audit_log_end(audit_buf); @@ -2702,14 +2989,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,  EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);  void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, -			      uid_t auid, u32 sessionid, u32 secid) +			      bool task_valid)  {  	struct audit_buffer *audit_buf;  	audit_buf = xfrm_audit_start("SPD-delete");  	if (audit_buf == NULL)  		return; -	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); +	xfrm_audit_helper_usrinfo(task_valid, audit_buf);  	audit_log_format(audit_buf, " res=%u", result);  	xfrm_audit_common_policyinfo(xp, audit_buf);  	audit_log_end(audit_buf); @@ -2718,38 +3005,37 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);  #endif  #ifdef CONFIG_XFRM_MIGRATE -static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, -				       struct xfrm_selector *sel_tgt) +static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, +					const struct xfrm_selector *sel_tgt)  {  	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {  		if (sel_tgt->family == sel_cmp->family && -		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr, -				  sel_cmp->family) == 0 && -		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr, -				  sel_cmp->family) == 0 && +		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, +				    sel_cmp->family) && +		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, +				    sel_cmp->family) &&  		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&  		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { -			return 1; +			return true;  		}  	} else {  		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { -			return 1; +			return true;  		}  	} -	return 0; +	return false;  } -static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, -						     u8 dir, u8 type) +static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, +						    u8 dir, u8 type, struct net *net)  {  	struct xfrm_policy *pol, *ret = NULL; -	struct hlist_node *entry;  	struct hlist_head *chain;  	u32 priority = ~0U; -	read_lock_bh(&xfrm_policy_lock); -	chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); -	hlist_for_each_entry(pol, entry, chain, bydst) { +	read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ +	chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); +	hlist_for_each_entry(pol, chain, bydst) {  		if (xfrm_migrate_selector_match(sel, &pol->selector) &&  		    pol->type == type) {  			ret = pol; @@ -2757,8 +3043,8 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,  			break;  		}  	} -	chain = &init_net.xfrm.policy_inexact[dir]; -	hlist_for_each_entry(pol, entry, chain, bydst) { +	chain = &net->xfrm.policy_inexact[dir]; +	hlist_for_each_entry(pol, chain, bydst) {  		if (xfrm_migrate_selector_match(sel, &pol->selector) &&  		    pol->type == type &&  		    pol->priority < priority) { @@ -2770,12 +3056,12 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,  	if (ret)  		xfrm_pol_hold(ret); -	read_unlock_bh(&xfrm_policy_lock); +	read_unlock_bh(&net->xfrm.xfrm_policy_lock);  	return ret;  } -static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) +static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)  {  	int match = 0; @@ -2784,10 +3070,10 @@ static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)  		switch (t->mode) {  		case XFRM_MODE_TUNNEL:  		case XFRM_MODE_BEET: -			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr, -					  m->old_family) == 0 && -			    xfrm_addr_cmp(&t->saddr, &m->old_saddr, -					  m->old_family) == 0) { +			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, +					    m->old_family) && +			    xfrm_addr_equal(&t->saddr, &m->old_saddr, +					    m->old_family)) {  				match = 1;  			}  			break; @@ -2845,7 +3131,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,  	return 0;  } -static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) +static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)  {  	int i, j; @@ -2853,10 +3139,10 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)  		return -EINVAL;  	for (i = 0; i < num_migrate; i++) { -		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr, -				   m[i].old_family) == 0) && -		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr, -				   m[i].old_family) == 0)) +		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, +				    m[i].old_family) && +		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, +				    m[i].old_family))  			return -EINVAL;  		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||  		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) @@ -2879,9 +3165,9 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)  	return 0;  } -int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, +int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,  		 struct xfrm_migrate *m, int num_migrate, -		 struct xfrm_kmaddress *k) +		 struct xfrm_kmaddress *k, struct net *net)  {  	int i, err, nx_cur = 0, nx_new = 0;  	struct xfrm_policy *pol = NULL; @@ -2894,14 +3180,14 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,  		goto out;  	/* Stage 1 - find policy */ -	if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { +	if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {  		err = -ENOENT;  		goto out;  	}  	/* Stage 2 - find and update state(s) */  	for (i = 0, mp = m; i < num_migrate; i++, mp++) { -		if ((x = xfrm_migrate_state_find(mp))) { +		if ((x = xfrm_migrate_state_find(mp, net))) {  			x_cur[nx_cur] = x;  			nx_cur++;  			if ((xc = xfrm_state_migrate(x, mp))) { diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 58d9ae00559..9c4fbd8935f 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -12,6 +12,7 @@   */  #include <linux/proc_fs.h>  #include <linux/seq_file.h> +#include <linux/export.h>  #include <net/snmp.h>  #include <net/xfrm.h> @@ -42,6 +43,8 @@ static const struct snmp_mib xfrm_mib_list[] = {  	SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),  	SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),  	SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), +	SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), +	SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR),  	SNMP_MIB_SENTINEL  }; @@ -49,10 +52,9 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)  {  	struct net *net = seq->private;  	int i; -	for (i=0; xfrm_mib_list[i].name; i++) +	for (i = 0; xfrm_mib_list[i].name; i++)  		seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, -			   snmp_fold_field((void __percpu **) -					   net->mib.xfrm_statistics, +			   snmp_fold_field(net->mib.xfrm_statistics,  					   xfrm_mib_list[i].entry));  	return 0;  } @@ -72,13 +74,13 @@ static const struct file_operations xfrm_statistics_seq_fops = {  int __net_init xfrm_proc_init(struct net *net)  { -	if (!proc_net_fops_create(net, "xfrm_stat", S_IRUGO, -				  &xfrm_statistics_seq_fops)) +	if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net, +			 &xfrm_statistics_seq_fops))  		return -ENOMEM;  	return 0;  }  void xfrm_proc_fini(struct net *net)  { -	proc_net_remove(net, "xfrm_stat"); +	remove_proc_entry("xfrm_stat", net->proc_net);  } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c new file mode 100644 index 00000000000..dab57daae40 --- /dev/null +++ b/net/xfrm/xfrm_replay.c @@ -0,0 +1,603 @@ +/* + * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c. + * + * Copyright (C) 2010 secunet Security Networks AG + * Copyright (C) 2010 Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/export.h> +#include <net/xfrm.h> + +u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) +{ +	u32 seq, seq_hi, bottom; +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + +	if (!(x->props.flags & XFRM_STATE_ESN)) +		return 0; + +	seq = ntohl(net_seq); +	seq_hi = replay_esn->seq_hi; +	bottom = replay_esn->seq - replay_esn->replay_window + 1; + +	if (likely(replay_esn->seq >= replay_esn->replay_window - 1)) { +		/* A. same subspace */ +		if (unlikely(seq < bottom)) +			seq_hi++; +	} else { +		/* B. window spans two subspaces */ +		if (unlikely(seq >= bottom)) +			seq_hi--; +	} + +	return seq_hi; +} + +static void xfrm_replay_notify(struct xfrm_state *x, int event) +{ +	struct km_event c; +	/* we send notify messages in case +	 *  1. we updated on of the sequence numbers, and the seqno difference +	 *     is at least x->replay_maxdiff, in this case we also update the +	 *     timeout of our timer function +	 *  2. if x->replay_maxage has elapsed since last update, +	 *     and there were changes +	 * +	 *  The state structure must be locked! +	 */ + +	switch (event) { +	case XFRM_REPLAY_UPDATE: +		if (!x->replay_maxdiff || +		    ((x->replay.seq - x->preplay.seq < x->replay_maxdiff) && +		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))) { +			if (x->xflags & XFRM_TIME_DEFER) +				event = XFRM_REPLAY_TIMEOUT; +			else +				return; +		} + +		break; + +	case XFRM_REPLAY_TIMEOUT: +		if (memcmp(&x->replay, &x->preplay, +			   sizeof(struct xfrm_replay_state)) == 0) { +			x->xflags |= XFRM_TIME_DEFER; +			return; +		} + +		break; +	} + +	memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); +	c.event = XFRM_MSG_NEWAE; +	c.data.aevent = event; +	km_state_notify(x, &c); + +	if (x->replay_maxage && +	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) +		x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ +	int err = 0; +	struct net *net = xs_net(x); + +	if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { +		XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; +		if (unlikely(x->replay.oseq == 0)) { +			x->replay.oseq--; +			xfrm_audit_state_replay_overflow(x, skb); +			err = -EOVERFLOW; + +			return err; +		} +		if (xfrm_aevent_is_on(net)) +			x->repl->notify(x, XFRM_REPLAY_UPDATE); +	} + +	return err; +} + +static int xfrm_replay_check(struct xfrm_state *x, +		      struct sk_buff *skb, __be32 net_seq) +{ +	u32 diff; +	u32 seq = ntohl(net_seq); + +	if (!x->props.replay_window) +		return 0; + +	if (unlikely(seq == 0)) +		goto err; + +	if (likely(seq > x->replay.seq)) +		return 0; + +	diff = x->replay.seq - seq; +	if (diff >= x->props.replay_window) { +		x->stats.replay_window++; +		goto err; +	} + +	if (x->replay.bitmap & (1U << diff)) { +		x->stats.replay++; +		goto err; +	} +	return 0; + +err: +	xfrm_audit_state_replay(x, skb, net_seq); +	return -EINVAL; +} + +static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) +{ +	u32 diff; +	u32 seq = ntohl(net_seq); + +	if (!x->props.replay_window) +		return; + +	if (seq > x->replay.seq) { +		diff = seq - x->replay.seq; +		if (diff < x->props.replay_window) +			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; +		else +			x->replay.bitmap = 1; +		x->replay.seq = seq; +	} else { +		diff = x->replay.seq - seq; +		x->replay.bitmap |= (1U << diff); +	} + +	if (xfrm_aevent_is_on(xs_net(x))) +		x->repl->notify(x, XFRM_REPLAY_UPDATE); +} + +static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) +{ +	int err = 0; +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; +	struct net *net = xs_net(x); + +	if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { +		XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; +		if (unlikely(replay_esn->oseq == 0)) { +			replay_esn->oseq--; +			xfrm_audit_state_replay_overflow(x, skb); +			err = -EOVERFLOW; + +			return err; +		} +		if (xfrm_aevent_is_on(net)) +			x->repl->notify(x, XFRM_REPLAY_UPDATE); +	} + +	return err; +} + +static int xfrm_replay_check_bmp(struct xfrm_state *x, +				 struct sk_buff *skb, __be32 net_seq) +{ +	unsigned int bitnr, nr; +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; +	u32 pos; +	u32 seq = ntohl(net_seq); +	u32 diff =  replay_esn->seq - seq; + +	if (!replay_esn->replay_window) +		return 0; + +	if (unlikely(seq == 0)) +		goto err; + +	if (likely(seq > replay_esn->seq)) +		return 0; + +	if (diff >= replay_esn->replay_window) { +		x->stats.replay_window++; +		goto err; +	} + +	pos = (replay_esn->seq - 1) % replay_esn->replay_window; + +	if (pos >= diff) +		bitnr = (pos - diff) % replay_esn->replay_window; +	else +		bitnr = replay_esn->replay_window - (diff - pos); + +	nr = bitnr >> 5; +	bitnr = bitnr & 0x1F; +	if (replay_esn->bmp[nr] & (1U << bitnr)) +		goto err_replay; + +	return 0; + +err_replay: +	x->stats.replay++; +err: +	xfrm_audit_state_replay(x, skb, net_seq); +	return -EINVAL; +} + +static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) +{ +	unsigned int bitnr, nr, i; +	u32 diff; +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; +	u32 seq = ntohl(net_seq); +	u32 pos; + +	if (!replay_esn->replay_window) +		return; + +	pos = (replay_esn->seq - 1) % replay_esn->replay_window; + +	if (seq > replay_esn->seq) { +		diff = seq - replay_esn->seq; + +		if (diff < replay_esn->replay_window) { +			for (i = 1; i < diff; i++) { +				bitnr = (pos + i) % replay_esn->replay_window; +				nr = bitnr >> 5; +				bitnr = bitnr & 0x1F; +				replay_esn->bmp[nr] &=  ~(1U << bitnr); +			} +		} else { +			nr = (replay_esn->replay_window - 1) >> 5; +			for (i = 0; i <= nr; i++) +				replay_esn->bmp[i] = 0; +		} + +		bitnr = (pos + diff) % replay_esn->replay_window; +		replay_esn->seq = seq; +	} else { +		diff = replay_esn->seq - seq; + +		if (pos >= diff) +			bitnr = (pos - diff) % replay_esn->replay_window; +		else +			bitnr = replay_esn->replay_window - (diff - pos); +	} + +	nr = bitnr >> 5; +	bitnr = bitnr & 0x1F; +	replay_esn->bmp[nr] |= (1U << bitnr); + +	if (xfrm_aevent_is_on(xs_net(x))) +		x->repl->notify(x, XFRM_REPLAY_UPDATE); +} + +static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) +{ +	struct km_event c; +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; +	struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + +	/* we send notify messages in case +	 *  1. we updated on of the sequence numbers, and the seqno difference +	 *     is at least x->replay_maxdiff, in this case we also update the +	 *     timeout of our timer function +	 *  2. if x->replay_maxage has elapsed since last update, +	 *     and there were changes +	 * +	 *  The state structure must be locked! +	 */ + +	switch (event) { +	case XFRM_REPLAY_UPDATE: +		if (!x->replay_maxdiff || +		    ((replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && +		    (replay_esn->oseq - preplay_esn->oseq +		     < x->replay_maxdiff))) { +			if (x->xflags & XFRM_TIME_DEFER) +				event = XFRM_REPLAY_TIMEOUT; +			else +				return; +		} + +		break; + +	case XFRM_REPLAY_TIMEOUT: +		if (memcmp(x->replay_esn, x->preplay_esn, +			   xfrm_replay_state_esn_len(replay_esn)) == 0) { +			x->xflags |= XFRM_TIME_DEFER; +			return; +		} + +		break; +	} + +	memcpy(x->preplay_esn, x->replay_esn, +	       xfrm_replay_state_esn_len(replay_esn)); +	c.event = XFRM_MSG_NEWAE; +	c.data.aevent = event; +	km_state_notify(x, &c); + +	if (x->replay_maxage && +	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) +		x->xflags &= ~XFRM_TIME_DEFER; +} + +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) +{ +	u32 seq_diff, oseq_diff; +	struct km_event c; +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; +	struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + +	/* we send notify messages in case +	 *  1. we updated on of the sequence numbers, and the seqno difference +	 *     is at least x->replay_maxdiff, in this case we also update the +	 *     timeout of our timer function +	 *  2. if x->replay_maxage has elapsed since last update, +	 *     and there were changes +	 * +	 *  The state structure must be locked! +	 */ + +	switch (event) { +	case XFRM_REPLAY_UPDATE: +		if (x->replay_maxdiff) { +			if (replay_esn->seq_hi == preplay_esn->seq_hi) +				seq_diff = replay_esn->seq - preplay_esn->seq; +			else +				seq_diff = ~preplay_esn->seq + replay_esn->seq +					   + 1; + +			if (replay_esn->oseq_hi == preplay_esn->oseq_hi) +				oseq_diff = replay_esn->oseq +					    - preplay_esn->oseq; +			else +				oseq_diff = ~preplay_esn->oseq +					    + replay_esn->oseq + 1; + +			if (seq_diff >= x->replay_maxdiff || +			    oseq_diff >= x->replay_maxdiff) +				break; +		} + +		if (x->xflags & XFRM_TIME_DEFER) +			event = XFRM_REPLAY_TIMEOUT; +		else +			return; + +		break; + +	case XFRM_REPLAY_TIMEOUT: +		if (memcmp(x->replay_esn, x->preplay_esn, +			   xfrm_replay_state_esn_len(replay_esn)) == 0) { +			x->xflags |= XFRM_TIME_DEFER; +			return; +		} + +		break; +	} + +	memcpy(x->preplay_esn, x->replay_esn, +	       xfrm_replay_state_esn_len(replay_esn)); +	c.event = XFRM_MSG_NEWAE; +	c.data.aevent = event; +	km_state_notify(x, &c); + +	if (x->replay_maxage && +	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) +		x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) +{ +	int err = 0; +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; +	struct net *net = xs_net(x); + +	if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { +		XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; +		XFRM_SKB_CB(skb)->seq.output.hi = replay_esn->oseq_hi; + +		if (unlikely(replay_esn->oseq == 0)) { +			XFRM_SKB_CB(skb)->seq.output.hi = ++replay_esn->oseq_hi; + +			if (replay_esn->oseq_hi == 0) { +				replay_esn->oseq--; +				replay_esn->oseq_hi--; +				xfrm_audit_state_replay_overflow(x, skb); +				err = -EOVERFLOW; + +				return err; +			} +		} +		if (xfrm_aevent_is_on(net)) +			x->repl->notify(x, XFRM_REPLAY_UPDATE); +	} + +	return err; +} + +static int xfrm_replay_check_esn(struct xfrm_state *x, +				 struct sk_buff *skb, __be32 net_seq) +{ +	unsigned int bitnr, nr; +	u32 diff; +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; +	u32 pos; +	u32 seq = ntohl(net_seq); +	u32 wsize = replay_esn->replay_window; +	u32 top = replay_esn->seq; +	u32 bottom = top - wsize + 1; + +	if (!wsize) +		return 0; + +	if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && +		     (replay_esn->seq < replay_esn->replay_window - 1))) +		goto err; + +	diff = top - seq; + +	if (likely(top >= wsize - 1)) { +		/* A. same subspace */ +		if (likely(seq > top) || seq < bottom) +			return 0; +	} else { +		/* B. window spans two subspaces */ +		if (likely(seq > top && seq < bottom)) +			return 0; +		if (seq >= bottom) +			diff = ~seq + top + 1; +	} + +	if (diff >= replay_esn->replay_window) { +		x->stats.replay_window++; +		goto err; +	} + +	pos = (replay_esn->seq - 1) % replay_esn->replay_window; + +	if (pos >= diff) +		bitnr = (pos - diff) % replay_esn->replay_window; +	else +		bitnr = replay_esn->replay_window - (diff - pos); + +	nr = bitnr >> 5; +	bitnr = bitnr & 0x1F; +	if (replay_esn->bmp[nr] & (1U << bitnr)) +		goto err_replay; + +	return 0; + +err_replay: +	x->stats.replay++; +err: +	xfrm_audit_state_replay(x, skb, net_seq); +	return -EINVAL; +} + +static int xfrm_replay_recheck_esn(struct xfrm_state *x, +				   struct sk_buff *skb, __be32 net_seq) +{ +	if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi != +		     htonl(xfrm_replay_seqhi(x, net_seq)))) { +			x->stats.replay_window++; +			return -EINVAL; +	} + +	return xfrm_replay_check_esn(x, skb, net_seq); +} + +static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) +{ +	unsigned int bitnr, nr, i; +	int wrap; +	u32 diff, pos, seq, seq_hi; +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + +	if (!replay_esn->replay_window) +		return; + +	seq = ntohl(net_seq); +	pos = (replay_esn->seq - 1) % replay_esn->replay_window; +	seq_hi = xfrm_replay_seqhi(x, net_seq); +	wrap = seq_hi - replay_esn->seq_hi; + +	if ((!wrap && seq > replay_esn->seq) || wrap > 0) { +		if (likely(!wrap)) +			diff = seq - replay_esn->seq; +		else +			diff = ~replay_esn->seq + seq + 1; + +		if (diff < replay_esn->replay_window) { +			for (i = 1; i < diff; i++) { +				bitnr = (pos + i) % replay_esn->replay_window; +				nr = bitnr >> 5; +				bitnr = bitnr & 0x1F; +				replay_esn->bmp[nr] &=  ~(1U << bitnr); +			} +		} else { +			nr = (replay_esn->replay_window - 1) >> 5; +			for (i = 0; i <= nr; i++) +				replay_esn->bmp[i] = 0; +		} + +		bitnr = (pos + diff) % replay_esn->replay_window; +		replay_esn->seq = seq; + +		if (unlikely(wrap > 0)) +			replay_esn->seq_hi++; +	} else { +		diff = replay_esn->seq - seq; + +		if (pos >= diff) +			bitnr = (pos - diff) % replay_esn->replay_window; +		else +			bitnr = replay_esn->replay_window - (diff - pos); +	} + +	nr = bitnr >> 5; +	bitnr = bitnr & 0x1F; +	replay_esn->bmp[nr] |= (1U << bitnr); + +	if (xfrm_aevent_is_on(xs_net(x))) +		x->repl->notify(x, XFRM_REPLAY_UPDATE); +} + +static struct xfrm_replay xfrm_replay_legacy = { +	.advance	= xfrm_replay_advance, +	.check		= xfrm_replay_check, +	.recheck	= xfrm_replay_check, +	.notify		= xfrm_replay_notify, +	.overflow	= xfrm_replay_overflow, +}; + +static struct xfrm_replay xfrm_replay_bmp = { +	.advance	= xfrm_replay_advance_bmp, +	.check		= xfrm_replay_check_bmp, +	.recheck	= xfrm_replay_check_bmp, +	.notify		= xfrm_replay_notify_bmp, +	.overflow	= xfrm_replay_overflow_bmp, +}; + +static struct xfrm_replay xfrm_replay_esn = { +	.advance	= xfrm_replay_advance_esn, +	.check		= xfrm_replay_check_esn, +	.recheck	= xfrm_replay_recheck_esn, +	.notify		= xfrm_replay_notify_esn, +	.overflow	= xfrm_replay_overflow_esn, +}; + +int xfrm_init_replay(struct xfrm_state *x) +{ +	struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + +	if (replay_esn) { +		if (replay_esn->replay_window > +		    replay_esn->bmp_len * sizeof(__u32) * 8) +			return -EINVAL; + +		if (x->props.flags & XFRM_STATE_ESN) { +			if (replay_esn->replay_window == 0) +				return -EINVAL; +			x->repl = &xfrm_replay_esn; +		} else +			x->repl = &xfrm_replay_bmp; +	} else +		x->repl = &xfrm_replay_legacy; + +	return 0; +} +EXPORT_SYMBOL(xfrm_init_replay); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index eb96ce52f17..0ab54134bb4 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -35,23 +35,11 @@        destination/tunnel endpoint. (output)   */ -static DEFINE_SPINLOCK(xfrm_state_lock); -  static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); - -#ifdef CONFIG_AUDITSYSCALL -static void xfrm_audit_state_replay(struct xfrm_state *x, -				    struct sk_buff *skb, __be32 net_seq); -#else -#define xfrm_audit_state_replay(x, s, sq)	do { ; } while (0) -#endif /* CONFIG_AUDITSYSCALL */ -  static inline unsigned int xfrm_dst_hash(struct net *net, -					 xfrm_address_t *daddr, -					 xfrm_address_t *saddr, +					 const xfrm_address_t *daddr, +					 const xfrm_address_t *saddr,  					 u32 reqid,  					 unsigned short family)  { @@ -59,15 +47,16 @@ static inline unsigned int xfrm_dst_hash(struct net *net,  }  static inline unsigned int xfrm_src_hash(struct net *net, -					 xfrm_address_t *daddr, -					 xfrm_address_t *saddr, +					 const xfrm_address_t *daddr, +					 const xfrm_address_t *saddr,  					 unsigned short family)  {  	return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);  }  static inline unsigned int -xfrm_spi_hash(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, +	      __be32 spi, u8 proto, unsigned short family)  {  	return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);  } @@ -78,10 +67,10 @@ static void xfrm_hash_transfer(struct hlist_head *list,  			       struct hlist_head *nspitable,  			       unsigned int nhashmask)  { -	struct hlist_node *entry, *tmp; +	struct hlist_node *tmp;  	struct xfrm_state *x; -	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { +	hlist_for_each_entry_safe(x, tmp, list, bydst) {  		unsigned int h;  		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, @@ -136,7 +125,7 @@ static void xfrm_hash_resize(struct work_struct *work)  		goto out_unlock;  	} -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;  	for (i = net->xfrm.state_hmask; i >= 0; i--) @@ -153,7 +142,7 @@ static void xfrm_hash_resize(struct work_struct *work)  	net->xfrm.state_byspi = nspi;  	net->xfrm.state_hmask = nhashmask; -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	osize = (ohashmask + 1) * sizeof(struct hlist_head);  	xfrm_hash_free(odst, osize); @@ -164,68 +153,56 @@ out_unlock:  	mutex_unlock(&hash_resize_mutex);  } -static DEFINE_RWLOCK(xfrm_state_afinfo_lock); -static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; +static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); +static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];  static DEFINE_SPINLOCK(xfrm_state_gc_lock);  int __xfrm_state_delete(struct xfrm_state *x);  int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid); - -static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) -{ -	struct xfrm_state_afinfo *afinfo; -	if (unlikely(family >= NPROTO)) -		return NULL; -	write_lock_bh(&xfrm_state_afinfo_lock); -	afinfo = xfrm_state_afinfo[family]; -	if (unlikely(!afinfo)) -		write_unlock_bh(&xfrm_state_afinfo_lock); -	return afinfo; -} - -static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) -	__releases(xfrm_state_afinfo_lock) -{ -	write_unlock_bh(&xfrm_state_afinfo_lock); -} +bool km_is_alive(const struct km_event *c); +void km_state_expired(struct xfrm_state *x, int hard, u32 portid); +static DEFINE_SPINLOCK(xfrm_type_lock);  int xfrm_register_type(const struct xfrm_type *type, unsigned short family)  { -	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); +	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);  	const struct xfrm_type **typemap;  	int err = 0;  	if (unlikely(afinfo == NULL))  		return -EAFNOSUPPORT;  	typemap = afinfo->type_map; +	spin_lock_bh(&xfrm_type_lock);  	if (likely(typemap[type->proto] == NULL))  		typemap[type->proto] = type;  	else  		err = -EEXIST; -	xfrm_state_unlock_afinfo(afinfo); +	spin_unlock_bh(&xfrm_type_lock); +	xfrm_state_put_afinfo(afinfo);  	return err;  }  EXPORT_SYMBOL(xfrm_register_type);  int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)  { -	struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); +	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);  	const struct xfrm_type **typemap;  	int err = 0;  	if (unlikely(afinfo == NULL))  		return -EAFNOSUPPORT;  	typemap = afinfo->type_map; +	spin_lock_bh(&xfrm_type_lock);  	if (unlikely(typemap[type->proto] != type))  		err = -ENOENT;  	else  		typemap[type->proto] = NULL; -	xfrm_state_unlock_afinfo(afinfo); +	spin_unlock_bh(&xfrm_type_lock); +	xfrm_state_put_afinfo(afinfo);  	return err;  }  EXPORT_SYMBOL(xfrm_unregister_type); @@ -262,6 +239,7 @@ static void xfrm_put_type(const struct xfrm_type *type)  	module_put(type->owner);  } +static DEFINE_SPINLOCK(xfrm_mode_lock);  int xfrm_register_mode(struct xfrm_mode *mode, int family)  {  	struct xfrm_state_afinfo *afinfo; @@ -271,12 +249,13 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)  	if (unlikely(mode->encap >= XFRM_MODE_MAX))  		return -EINVAL; -	afinfo = xfrm_state_lock_afinfo(family); +	afinfo = xfrm_state_get_afinfo(family);  	if (unlikely(afinfo == NULL))  		return -EAFNOSUPPORT;  	err = -EEXIST;  	modemap = afinfo->mode_map; +	spin_lock_bh(&xfrm_mode_lock);  	if (modemap[mode->encap])  		goto out; @@ -289,7 +268,8 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)  	err = 0;  out: -	xfrm_state_unlock_afinfo(afinfo); +	spin_unlock_bh(&xfrm_mode_lock); +	xfrm_state_put_afinfo(afinfo);  	return err;  }  EXPORT_SYMBOL(xfrm_register_mode); @@ -303,19 +283,21 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family)  	if (unlikely(mode->encap >= XFRM_MODE_MAX))  		return -EINVAL; -	afinfo = xfrm_state_lock_afinfo(family); +	afinfo = xfrm_state_get_afinfo(family);  	if (unlikely(afinfo == NULL))  		return -EAFNOSUPPORT;  	err = -ENOENT;  	modemap = afinfo->mode_map; +	spin_lock_bh(&xfrm_mode_lock);  	if (likely(modemap[mode->encap] == mode)) {  		modemap[mode->encap] = NULL;  		module_put(mode->afinfo->owner);  		err = 0;  	} -	xfrm_state_unlock_afinfo(afinfo); +	spin_unlock_bh(&xfrm_mode_lock); +	xfrm_state_put_afinfo(afinfo);  	return err;  }  EXPORT_SYMBOL(xfrm_unregister_mode); @@ -362,6 +344,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)  	kfree(x->calg);  	kfree(x->encap);  	kfree(x->coaddr); +	kfree(x->replay_esn); +	kfree(x->preplay_esn);  	if (x->inner_mode)  		xfrm_put_mode(x->inner_mode);  	if (x->inner_mode_iaf) @@ -380,17 +364,15 @@ static void xfrm_state_gc_task(struct work_struct *work)  {  	struct net *net = container_of(work, struct net, xfrm.state_gc_work);  	struct xfrm_state *x; -	struct hlist_node *entry, *tmp; +	struct hlist_node *tmp;  	struct hlist_head gc_list;  	spin_lock_bh(&xfrm_state_gc_lock);  	hlist_move_list(&net->xfrm.state_gc_list, &gc_list);  	spin_unlock_bh(&xfrm_state_gc_lock); -	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) +	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)  		xfrm_state_gc_destroy(x); - -	wake_up(&net->xfrm.km_waitq);  }  static inline unsigned long make_jiffies(long secs) @@ -401,11 +383,10 @@ static inline unsigned long make_jiffies(long secs)  		return secs*HZ;  } -static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) +static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)  {  	struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);  	struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); -	struct net *net = xs_net(x);  	unsigned long now = get_seconds();  	long next = LONG_MAX;  	int warn = 0; @@ -419,8 +400,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)  	if (x->lft.hard_add_expires_seconds) {  		long tmo = x->lft.hard_add_expires_seconds +  			x->curlft.add_time - now; -		if (tmo <= 0) -			goto expired; +		if (tmo <= 0) { +			if (x->xflags & XFRM_SOFT_EXPIRE) { +				/* enter hard expire without soft expire first?! +				 * setting a new date could trigger this. +				 * workarbound: fix x->curflt.add_time by below: +				 */ +				x->curlft.add_time = now - x->saved_tmo - 1; +				tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; +			} else +				goto expired; +		}  		if (tmo < next)  			next = tmo;  	} @@ -437,10 +427,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)  	if (x->lft.soft_add_expires_seconds) {  		long tmo = x->lft.soft_add_expires_seconds +  			x->curlft.add_time - now; -		if (tmo <= 0) +		if (tmo <= 0) {  			warn = 1; -		else if (tmo < next) +			x->xflags &= ~XFRM_SOFT_EXPIRE; +		} else if (tmo < next) {  			next = tmo; +			x->xflags |= XFRM_SOFT_EXPIRE; +			x->saved_tmo = tmo; +		}  	}  	if (x->lft.soft_use_expires_seconds) {  		long tmo = x->lft.soft_use_expires_seconds + @@ -455,27 +449,21 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)  	if (warn)  		km_state_expired(x, 0, 0);  resched: -	if (next != LONG_MAX){ +	if (next != LONG_MAX) {  		tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);  	}  	goto out;  expired: -	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { +	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)  		x->km.state = XFRM_STATE_EXPIRED; -		wake_up(&net->xfrm.km_waitq); -		next = 2; -		goto resched; -	}  	err = __xfrm_state_delete(x); -	if (!err && x->id.spi) +	if (!err)  		km_state_expired(x, 1, 0); -	xfrm_audit_state_delete(x, err ? 0 : 1, -				audit_get_loginuid(current), -				audit_get_sessionid(current), 0); +	xfrm_audit_state_delete(x, err ? 0 : 1, true);  out:  	spin_unlock(&x->lock); @@ -498,7 +486,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)  		INIT_HLIST_NODE(&x->bydst);  		INIT_HLIST_NODE(&x->bysrc);  		INIT_HLIST_NODE(&x->byspi); -		tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS); +		tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, +					CLOCK_BOOTTIME, HRTIMER_MODE_ABS);  		setup_timer(&x->rtimer, xfrm_replay_timer_handler,  				(unsigned long)x);  		x->curlft.add_time = get_seconds(); @@ -536,14 +525,14 @@ int __xfrm_state_delete(struct xfrm_state *x)  	if (x->km.state != XFRM_STATE_DEAD) {  		x->km.state = XFRM_STATE_DEAD; -		spin_lock(&xfrm_state_lock); +		spin_lock(&net->xfrm.xfrm_state_lock);  		list_del(&x->km.all);  		hlist_del(&x->bydst);  		hlist_del(&x->bysrc);  		if (x->id.spi)  			hlist_del(&x->byspi);  		net->xfrm.state_num--; -		spin_unlock(&xfrm_state_lock); +		spin_unlock(&net->xfrm.xfrm_state_lock);  		/* All xfrm_state objects are created by xfrm_state_alloc.  		 * The xfrm_state_alloc call gives a reference, and that @@ -571,21 +560,17 @@ EXPORT_SYMBOL(xfrm_state_delete);  #ifdef CONFIG_SECURITY_NETWORK_XFRM  static inline int -xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)  {  	int i, err = 0;  	for (i = 0; i <= net->xfrm.state_hmask; i++) { -		struct hlist_node *entry;  		struct xfrm_state *x; -		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { +		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {  			if (xfrm_id_proto_match(x->id.proto, proto) &&  			   (err = security_xfrm_state_delete(x)) != 0) { -				xfrm_audit_state_delete(x, 0, -							audit_info->loginuid, -							audit_info->sessionid, -							audit_info->secid); +				xfrm_audit_state_delete(x, 0, task_valid);  				return err;  			}  		} @@ -595,42 +580,39 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi  }  #else  static inline int -xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)  {  	return 0;  }  #endif -int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)  {  	int i, err = 0, cnt = 0; -	spin_lock_bh(&xfrm_state_lock); -	err = xfrm_state_flush_secctx_check(net, proto, audit_info); +	spin_lock_bh(&net->xfrm.xfrm_state_lock); +	err = xfrm_state_flush_secctx_check(net, proto, task_valid);  	if (err)  		goto out;  	err = -ESRCH;  	for (i = 0; i <= net->xfrm.state_hmask; i++) { -		struct hlist_node *entry;  		struct xfrm_state *x;  restart: -		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { +		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {  			if (!xfrm_state_kern(x) &&  			    xfrm_id_proto_match(x->id.proto, proto)) {  				xfrm_state_hold(x); -				spin_unlock_bh(&xfrm_state_lock); +				spin_unlock_bh(&net->xfrm.xfrm_state_lock);  				err = xfrm_state_delete(x);  				xfrm_audit_state_delete(x, err ? 0 : 1, -							audit_info->loginuid, -							audit_info->sessionid, -							audit_info->secid); +							task_valid);  				xfrm_state_put(x);  				if (!err)  					cnt++; -				spin_lock_bh(&xfrm_state_lock); +				spin_lock_bh(&net->xfrm.xfrm_state_lock);  				goto restart;  			}  		} @@ -639,26 +621,25 @@ restart:  		err = 0;  out: -	spin_unlock_bh(&xfrm_state_lock); -	wake_up(&net->xfrm.km_waitq); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	return err;  }  EXPORT_SYMBOL(xfrm_state_flush);  void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)  { -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	si->sadcnt = net->xfrm.state_num;  	si->sadhcnt = net->xfrm.state_hmask;  	si->sadhmcnt = xfrm_state_hashmax; -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  }  EXPORT_SYMBOL(xfrm_sad_getinfo);  static int -xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, -		    struct xfrm_tmpl *tmpl, -		    xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, +		    const struct xfrm_tmpl *tmpl, +		    const xfrm_address_t *daddr, const xfrm_address_t *saddr,  		    unsigned short family)  {  	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); @@ -677,17 +658,19 @@ xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl,  	return 0;  } -static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, +					      const xfrm_address_t *daddr, +					      __be32 spi, u8 proto, +					      unsigned short family)  {  	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);  	struct xfrm_state *x; -	struct hlist_node *entry; -	hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { +	hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {  		if (x->props.family != family ||  		    x->id.spi       != spi ||  		    x->id.proto     != proto || -		    xfrm_addr_cmp(&x->id.daddr, daddr, family)) +		    !xfrm_addr_equal(&x->id.daddr, daddr, family))  			continue;  		if ((mark & x->mark.m) != x->mark.v) @@ -699,17 +682,19 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_ad  	return NULL;  } -static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, +						     const xfrm_address_t *daddr, +						     const xfrm_address_t *saddr, +						     u8 proto, unsigned short family)  {  	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);  	struct xfrm_state *x; -	struct hlist_node *entry; -	hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { +	hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {  		if (x->props.family != family ||  		    x->id.proto     != proto || -		    xfrm_addr_cmp(&x->id.daddr, daddr, family) || -		    xfrm_addr_cmp(&x->props.saddr, saddr, family)) +		    !xfrm_addr_equal(&x->id.daddr, daddr, family) || +		    !xfrm_addr_equal(&x->props.saddr, saddr, family))  			continue;  		if ((mark & x->mark.m) != x->mark.v) @@ -746,8 +731,7 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)  }  static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, -			       struct flowi *fl, unsigned short family, -			       xfrm_address_t *daddr, xfrm_address_t *saddr, +			       const struct flowi *fl, unsigned short family,  			       struct xfrm_state **best, int *acq_in_progress,  			       int *error)  { @@ -784,27 +768,27 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,  }  struct xfrm_state * -xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, -		struct flowi *fl, struct xfrm_tmpl *tmpl, +xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, +		const struct flowi *fl, struct xfrm_tmpl *tmpl,  		struct xfrm_policy *pol, int *err,  		unsigned short family)  {  	static xfrm_address_t saddr_wildcard = { };  	struct net *net = xp_net(pol);  	unsigned int h, h_wildcard; -	struct hlist_node *entry;  	struct xfrm_state *x, *x0, *to_put;  	int acquire_in_progress = 0;  	int error = 0;  	struct xfrm_state *best = NULL;  	u32 mark = pol->mark.v & pol->mark.m;  	unsigned short encap_family = tmpl->encap_family; +	struct km_event c;  	to_put = NULL; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); -	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { +	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {  		if (x->props.family == encap_family &&  		    x->props.reqid == tmpl->reqid &&  		    (mark & x->mark.m) == x->mark.v && @@ -813,23 +797,23 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,  		    tmpl->mode == x->props.mode &&  		    tmpl->id.proto == x->id.proto &&  		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) -			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, +			xfrm_state_look_at(pol, x, fl, encap_family,  					   &best, &acquire_in_progress, &error);  	} -	if (best) +	if (best || acquire_in_progress)  		goto found;  	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); -	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { +	hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {  		if (x->props.family == encap_family &&  		    x->props.reqid == tmpl->reqid &&  		    (mark & x->mark.m) == x->mark.v &&  		    !(x->props.flags & XFRM_STATE_WILDRECV) && -		    xfrm_state_addr_check(x, daddr, saddr, encap_family) && +		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&  		    tmpl->mode == x->props.mode &&  		    tmpl->id.proto == x->id.proto &&  		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) -			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, +			xfrm_state_look_at(pol, x, fl, encap_family,  					   &best, &acquire_in_progress, &error);  	} @@ -843,6 +827,17 @@ found:  			error = -EEXIST;  			goto out;  		} + +		c.net = net; +		/* If the KMs have no listeners (yet...), avoid allocating an SA +		 * for each and every packet - garbage collection might not +		 * handle the flood. +		 */ +		if (!km_is_alive(&c)) { +			error = -ESRCH; +			goto out; +		} +  		x = xfrm_state_alloc(net);  		if (x == NULL) {  			error = -ENOMEM; @@ -853,7 +848,7 @@ found:  		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);  		memcpy(&x->mark, &pol->mark, sizeof(x->mark)); -		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); +		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);  		if (error) {  			x->km.state = XFRM_STATE_DEAD;  			to_put = x; @@ -887,7 +882,7 @@ out:  		xfrm_state_hold(x);  	else  		*err = acquire_in_progress ? -EAGAIN : error; -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	if (to_put)  		xfrm_state_put(to_put);  	return x; @@ -900,11 +895,10 @@ xfrm_stateonly_find(struct net *net, u32 mark,  {  	unsigned int h;  	struct xfrm_state *rx = NULL, *x = NULL; -	struct hlist_node *entry; -	spin_lock(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	h = xfrm_dst_hash(net, daddr, saddr, reqid, family); -	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { +	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {  		if (x->props.family == family &&  		    x->props.reqid == reqid &&  		    (mark & x->mark.m) == x->mark.v && @@ -920,13 +914,35 @@ xfrm_stateonly_find(struct net *net, u32 mark,  	if (rx)  		xfrm_state_hold(rx); -	spin_unlock(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	return rx;  }  EXPORT_SYMBOL(xfrm_stateonly_find); +struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, +					      unsigned short family) +{ +	struct xfrm_state *x; +	struct xfrm_state_walk *w; + +	spin_lock_bh(&net->xfrm.xfrm_state_lock); +	list_for_each_entry(w, &net->xfrm.state_all, all) { +		x = container_of(w, struct xfrm_state, km); +		if (x->props.family != family || +			x->id.spi != spi) +			continue; + +		spin_unlock_bh(&net->xfrm.xfrm_state_lock); +		xfrm_state_hold(x); +		return x; +	} +	spin_unlock_bh(&net->xfrm.xfrm_state_lock); +	return NULL; +} +EXPORT_SYMBOL(xfrm_state_lookup_byspi); +  static void __xfrm_state_insert(struct xfrm_state *x)  {  	struct net *net = xs_net(x); @@ -952,53 +968,57 @@ static void __xfrm_state_insert(struct xfrm_state *x)  	if (x->replay_maxage)  		mod_timer(&x->rtimer, jiffies + x->replay_maxage); -	wake_up(&net->xfrm.km_waitq); -  	net->xfrm.state_num++;  	xfrm_hash_grow_check(net, x->bydst.next != NULL);  } -/* xfrm_state_lock is held */ +/* net->xfrm.xfrm_state_lock is held */  static void __xfrm_state_bump_genids(struct xfrm_state *xnew)  {  	struct net *net = xs_net(xnew);  	unsigned short family = xnew->props.family;  	u32 reqid = xnew->props.reqid;  	struct xfrm_state *x; -	struct hlist_node *entry;  	unsigned int h;  	u32 mark = xnew->mark.v & xnew->mark.m;  	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); -	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { +	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {  		if (x->props.family	== family &&  		    x->props.reqid	== reqid &&  		    (mark & x->mark.m) == x->mark.v && -		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && -		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) +		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && +		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))  			x->genid++;  	}  }  void xfrm_state_insert(struct xfrm_state *x)  { -	spin_lock_bh(&xfrm_state_lock); +	struct net *net = xs_net(x); + +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	__xfrm_state_bump_genids(x);  	__xfrm_state_insert(x); -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  }  EXPORT_SYMBOL(xfrm_state_insert); -/* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +/* net->xfrm.xfrm_state_lock is held */ +static struct xfrm_state *__find_acq_core(struct net *net, +					  const struct xfrm_mark *m, +					  unsigned short family, u8 mode, +					  u32 reqid, u8 proto, +					  const xfrm_address_t *daddr, +					  const xfrm_address_t *saddr, +					  int create)  {  	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); -	struct hlist_node *entry;  	struct xfrm_state *x;  	u32 mark = m->v & m->m; -	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { +	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {  		if (x->props.reqid  != reqid ||  		    x->props.mode   != mode ||  		    x->props.family != family || @@ -1006,8 +1026,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,  		    x->id.spi       != 0 ||  		    x->id.proto	    != proto ||  		    (mark & x->mark.m) != x->mark.v || -		    xfrm_addr_cmp(&x->id.daddr, daddr, family) || -		    xfrm_addr_cmp(&x->props.saddr, saddr, family)) +		    !xfrm_addr_equal(&x->id.daddr, daddr, family) || +		    !xfrm_addr_equal(&x->props.saddr, saddr, family))  			continue;  		xfrm_state_hold(x); @@ -1030,16 +1050,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,  			break;  		case AF_INET6: -			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, -				       (struct in6_addr *)daddr); -			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, -				       (struct in6_addr *)saddr); +			*(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr; +			*(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr;  			x->sel.prefixlen_d = 128;  			x->sel.prefixlen_s = 128; -			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, -				       (struct in6_addr *)saddr); -			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, -				       (struct in6_addr *)daddr); +			*(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr; +			*(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr;  			break;  		} @@ -1081,7 +1097,7 @@ int xfrm_state_add(struct xfrm_state *x)  	to_put = NULL; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	x1 = __xfrm_state_locate(x, use_spi, family);  	if (x1) { @@ -1094,7 +1110,7 @@ int xfrm_state_add(struct xfrm_state *x)  	if (use_spi && x->km.seq) {  		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);  		if (x1 && ((x1->id.proto != x->id.proto) || -		    xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { +		    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {  			to_put = x1;  			x1 = NULL;  		} @@ -1110,7 +1126,7 @@ int xfrm_state_add(struct xfrm_state *x)  	err = 0;  out: -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	if (x1) {  		xfrm_state_delete(x1); @@ -1125,10 +1141,9 @@ out:  EXPORT_SYMBOL(xfrm_state_add);  #ifdef CONFIG_XFRM_MIGRATE -static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) +static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)  {  	struct net *net = xs_net(orig); -	int err = -ENOMEM;  	struct xfrm_state *x = xfrm_state_alloc(net);  	if (!x)  		goto out; @@ -1149,6 +1164,11 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)  	}  	x->props.aalgo = orig->props.aalgo; +	if (orig->aead) { +		x->aead = xfrm_algo_aead_clone(orig->aead); +		if (!x->aead) +			goto error; +	}  	if (orig->ealg) {  		x->ealg = xfrm_algo_clone(orig->ealg);  		if (!x->ealg) @@ -1176,14 +1196,22 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)  			goto error;  	} +	if (orig->replay_esn) { +		if (xfrm_replay_clone(x, orig)) +			goto error; +	} +  	memcpy(&x->mark, &orig->mark, sizeof(x->mark)); -	err = xfrm_init_state(x); -	if (err) +	if (xfrm_init_state(x) < 0)  		goto error;  	x->props.flags = orig->props.flags; +	x->props.extra_flags = orig->props.extra_flags; +	x->tfcpad = orig->tfcpad; +	x->replay_maxdiff = orig->replay_maxdiff; +	x->replay_maxage = orig->replay_maxage;  	x->curlft.add_time = orig->curlft.add_time;  	x->km.state = orig->km.state;  	x->km.seq = orig->km.seq; @@ -1193,63 +1221,62 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)   error:  	xfrm_state_put(x);  out: -	if (errp) -		*errp = err;  	return NULL;  } -/* xfrm_state_lock is held */ -struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)  {  	unsigned int h; -	struct xfrm_state *x; -	struct hlist_node *entry; +	struct xfrm_state *x = NULL; + +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	if (m->reqid) { -		h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr, +		h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,  				  m->reqid, m->old_family); -		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { +		hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {  			if (x->props.mode != m->mode ||  			    x->id.proto != m->proto)  				continue;  			if (m->reqid && x->props.reqid != m->reqid)  				continue; -			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, -					  m->old_family) || -			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, -					  m->old_family)) +			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, +					     m->old_family) || +			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, +					     m->old_family))  				continue;  			xfrm_state_hold(x); -			return x; +			break;  		}  	} else { -		h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr, +		h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,  				  m->old_family); -		hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { +		hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {  			if (x->props.mode != m->mode ||  			    x->id.proto != m->proto)  				continue; -			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, -					  m->old_family) || -			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, -					  m->old_family)) +			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, +					     m->old_family) || +			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, +					     m->old_family))  				continue;  			xfrm_state_hold(x); -			return x; +			break;  		}  	} -	return NULL; +	spin_unlock_bh(&net->xfrm.xfrm_state_lock); + +	return x;  }  EXPORT_SYMBOL(xfrm_migrate_state_find); -struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, -				       struct xfrm_migrate *m) +struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, +				      struct xfrm_migrate *m)  {  	struct xfrm_state *xc; -	int err; -	xc = xfrm_state_clone(x, &err); +	xc = xfrm_state_clone(x);  	if (!xc)  		return NULL; @@ -1257,18 +1284,18 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,  	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));  	/* add state */ -	if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) { +	if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {  		/* a care is needed when the destination address of the  		   state is to be updated as it is a part of triplet */  		xfrm_state_insert(xc);  	} else { -		if ((err = xfrm_state_add(xc)) < 0) +		if (xfrm_state_add(xc) < 0)  			goto error;  	}  	return xc;  error: -	kfree(xc); +	xfrm_state_put(xc);  	return NULL;  }  EXPORT_SYMBOL(xfrm_state_migrate); @@ -1279,10 +1306,11 @@ int xfrm_state_update(struct xfrm_state *x)  	struct xfrm_state *x1, *to_put;  	int err;  	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); +	struct net *net = xs_net(x);  	to_put = NULL; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	x1 = __xfrm_state_locate(x, use_spi, x->props.family);  	err = -ESRCH; @@ -1302,7 +1330,7 @@ int xfrm_state_update(struct xfrm_state *x)  	err = 0;  out: -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	if (to_put)  		xfrm_state_put(to_put); @@ -1334,6 +1362,8 @@ out:  			xfrm_state_check_expire(x1);  		err = 0; +		x->km.state = XFRM_STATE_DEAD; +		__xfrm_state_put(x);  	}  	spin_unlock_bh(&x1->lock); @@ -1348,13 +1378,10 @@ int xfrm_state_check_expire(struct xfrm_state *x)  	if (!x->curlft.use_time)  		x->curlft.use_time = get_seconds(); -	if (x->km.state != XFRM_STATE_VALID) -		return -EINVAL; -  	if (x->curlft.bytes >= x->lft.hard_byte_limit ||  	    x->curlft.packets >= x->lft.hard_packet_limit) {  		x->km.state = XFRM_STATE_EXPIRED; -		tasklet_hrtimer_start(&x->mtimer, ktime_set(0,0), HRTIMER_MODE_REL); +		tasklet_hrtimer_start(&x->mtimer, ktime_set(0, 0), HRTIMER_MODE_REL);  		return -EINVAL;  	} @@ -1369,42 +1396,42 @@ int xfrm_state_check_expire(struct xfrm_state *x)  EXPORT_SYMBOL(xfrm_state_check_expire);  struct xfrm_state * -xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, +xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,  		  u8 proto, unsigned short family)  {  	struct xfrm_state *x; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	return x;  }  EXPORT_SYMBOL(xfrm_state_lookup);  struct xfrm_state *  xfrm_state_lookup_byaddr(struct net *net, u32 mark, -			 xfrm_address_t *daddr, xfrm_address_t *saddr, +			 const xfrm_address_t *daddr, const xfrm_address_t *saddr,  			 u8 proto, unsigned short family)  {  	struct xfrm_state *x; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	return x;  }  EXPORT_SYMBOL(xfrm_state_lookup_byaddr);  struct xfrm_state * -xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, -	      xfrm_address_t *daddr, xfrm_address_t *saddr, -	      int create, unsigned short family) +xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, +	      u8 proto, const xfrm_address_t *daddr, +	      const xfrm_address_t *saddr, int create, unsigned short family)  {  	struct xfrm_state *x; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create); -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	return x;  } @@ -1413,17 +1440,17 @@ EXPORT_SYMBOL(xfrm_find_acq);  #ifdef CONFIG_XFRM_SUB_POLICY  int  xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, -	       unsigned short family) +	       unsigned short family, struct net *net)  {  	int err = 0;  	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);  	if (!afinfo)  		return -EAFNOSUPPORT; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/  	if (afinfo->tmpl_sort)  		err = afinfo->tmpl_sort(dst, src, n); -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	xfrm_state_put_afinfo(afinfo);  	return err;  } @@ -1435,13 +1462,15 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,  {  	int err = 0;  	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); +	struct net *net = xs_net(*src); +  	if (!afinfo)  		return -EAFNOSUPPORT; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	if (afinfo->state_sort)  		err = afinfo->state_sort(dst, src, n); -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	xfrm_state_put_afinfo(afinfo);  	return err;  } @@ -1455,10 +1484,9 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s  	int i;  	for (i = 0; i <= net->xfrm.state_hmask; i++) { -		struct hlist_node *entry;  		struct xfrm_state *x; -		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { +		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {  			if (x->km.seq == seq &&  			    (mark & x->mark.m) == x->mark.v &&  			    x->km.state == XFRM_STATE_ACQ) { @@ -1474,9 +1502,9 @@ struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)  {  	struct xfrm_state *x; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	x = __xfrm_find_acq_byseq(net, mark, seq); -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	return x;  }  EXPORT_SYMBOL(xfrm_find_acq_byseq); @@ -1494,6 +1522,30 @@ u32 xfrm_get_acqseq(void)  }  EXPORT_SYMBOL(xfrm_get_acqseq); +int verify_spi_info(u8 proto, u32 min, u32 max) +{ +	switch (proto) { +	case IPPROTO_AH: +	case IPPROTO_ESP: +		break; + +	case IPPROTO_COMP: +		/* IPCOMP spi is 16-bits. */ +		if (max >= 0x10000) +			return -EINVAL; +		break; + +	default: +		return -EINVAL; +	} + +	if (min > max) +		return -EINVAL; + +	return 0; +} +EXPORT_SYMBOL(verify_spi_info); +  int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)  {  	struct net *net = xs_net(x); @@ -1523,8 +1575,8 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)  		x->id.spi = minspi;  	} else {  		u32 spi = 0; -		for (h=0; h<high-low+1; h++) { -			spi = low + net_random()%(high-low+1); +		for (h = 0; h < high-low+1; h++) { +			spi = low + prandom_u32()%(high-low+1);  			x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);  			if (x0 == NULL) {  				x->id.spi = htonl(spi); @@ -1534,10 +1586,10 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)  		}  	}  	if (x->id.spi) { -		spin_lock_bh(&xfrm_state_lock); +		spin_lock_bh(&net->xfrm.xfrm_state_lock);  		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);  		hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); -		spin_unlock_bh(&xfrm_state_lock); +		spin_unlock_bh(&net->xfrm.xfrm_state_lock);  		err = 0;  	} @@ -1549,6 +1601,23 @@ unlock:  }  EXPORT_SYMBOL(xfrm_alloc_spi); +static bool __xfrm_state_filter_match(struct xfrm_state *x, +				      struct xfrm_address_filter *filter) +{ +	if (filter) { +		if ((filter->family == AF_INET || +		     filter->family == AF_INET6) && +		    x->props.family != filter->family) +			return false; + +		return addr_match(&x->props.saddr, &filter->saddr, +				  filter->splen) && +		       addr_match(&x->id.daddr, &filter->daddr, +				  filter->dplen); +	} +	return true; +} +  int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,  		    int (*func)(struct xfrm_state *, int, void*),  		    void *data) @@ -1560,7 +1629,7 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,  	if (walk->seq != 0 && list_empty(&walk->all))  		return 0; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	if (list_empty(&walk->all))  		x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);  	else @@ -1571,6 +1640,8 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,  		state = container_of(x, struct xfrm_state, km);  		if (!xfrm_id_proto_match(state->id.proto, walk->proto))  			continue; +		if (!__xfrm_state_filter_match(state, walk->filter)) +			continue;  		err = func(state, walk->seq, data);  		if (err) {  			list_move_tail(&walk->all, &x->all); @@ -1584,88 +1655,44 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,  	}  	list_del_init(&walk->all);  out: -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  	return err;  }  EXPORT_SYMBOL(xfrm_state_walk); -void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto, +			  struct xfrm_address_filter *filter)  {  	INIT_LIST_HEAD(&walk->all);  	walk->proto = proto;  	walk->state = XFRM_STATE_DEAD;  	walk->seq = 0; +	walk->filter = filter;  }  EXPORT_SYMBOL(xfrm_state_walk_init); -void xfrm_state_walk_done(struct xfrm_state_walk *walk) +void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)  { +	kfree(walk->filter); +  	if (list_empty(&walk->all))  		return; -	spin_lock_bh(&xfrm_state_lock); +	spin_lock_bh(&net->xfrm.xfrm_state_lock);  	list_del(&walk->all); -	spin_unlock_bh(&xfrm_state_lock); +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);  }  EXPORT_SYMBOL(xfrm_state_walk_done); - -void xfrm_replay_notify(struct xfrm_state *x, int event) -{ -	struct km_event c; -	/* we send notify messages in case -	 *  1. we updated on of the sequence numbers, and the seqno difference -	 *     is at least x->replay_maxdiff, in this case we also update the -	 *     timeout of our timer function -	 *  2. if x->replay_maxage has elapsed since last update, -	 *     and there were changes -	 * -	 *  The state structure must be locked! -	 */ - -	switch (event) { -	case XFRM_REPLAY_UPDATE: -		if (x->replay_maxdiff && -		    (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && -		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { -			if (x->xflags & XFRM_TIME_DEFER) -				event = XFRM_REPLAY_TIMEOUT; -			else -				return; -		} - -		break; - -	case XFRM_REPLAY_TIMEOUT: -		if ((x->replay.seq == x->preplay.seq) && -		    (x->replay.bitmap == x->preplay.bitmap) && -		    (x->replay.oseq == x->preplay.oseq)) { -			x->xflags |= XFRM_TIME_DEFER; -			return; -		} - -		break; -	} - -	memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); -	c.event = XFRM_MSG_NEWAE; -	c.data.aevent = event; -	km_state_notify(x, &c); - -	if (x->replay_maxage && -	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) -		x->xflags &= ~XFRM_TIME_DEFER; -} -  static void xfrm_replay_timer_handler(unsigned long data)  { -	struct xfrm_state *x = (struct xfrm_state*)data; +	struct xfrm_state *x = (struct xfrm_state *)data;  	spin_lock(&x->lock);  	if (x->km.state == XFRM_STATE_VALID) {  		if (xfrm_aevent_is_on(xs_net(x))) -			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); +			x->repl->notify(x, XFRM_REPLAY_TIMEOUT);  		else  			x->xflags |= XFRM_TIME_DEFER;  	} @@ -1673,96 +1700,40 @@ static void xfrm_replay_timer_handler(unsigned long data)  	spin_unlock(&x->lock);  } -int xfrm_replay_check(struct xfrm_state *x, -		      struct sk_buff *skb, __be32 net_seq) -{ -	u32 diff; -	u32 seq = ntohl(net_seq); - -	if (unlikely(seq == 0)) -		goto err; - -	if (likely(seq > x->replay.seq)) -		return 0; - -	diff = x->replay.seq - seq; -	if (diff >= min_t(unsigned int, x->props.replay_window, -			  sizeof(x->replay.bitmap) * 8)) { -		x->stats.replay_window++; -		goto err; -	} - -	if (x->replay.bitmap & (1U << diff)) { -		x->stats.replay++; -		goto err; -	} -	return 0; - -err: -	xfrm_audit_state_replay(x, skb, net_seq); -	return -EINVAL; -} - -void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) -{ -	u32 diff; -	u32 seq = ntohl(net_seq); - -	if (seq > x->replay.seq) { -		diff = seq - x->replay.seq; -		if (diff < x->props.replay_window) -			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; -		else -			x->replay.bitmap = 1; -		x->replay.seq = seq; -	} else { -		diff = x->replay.seq - seq; -		x->replay.bitmap |= (1U << diff); -	} - -	if (xfrm_aevent_is_on(xs_net(x))) -		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); -} -  static LIST_HEAD(xfrm_km_list); -static DEFINE_RWLOCK(xfrm_km_lock); -void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)  {  	struct xfrm_mgr *km; -	read_lock(&xfrm_km_lock); -	list_for_each_entry(km, &xfrm_km_list, list) +	rcu_read_lock(); +	list_for_each_entry_rcu(km, &xfrm_km_list, list)  		if (km->notify_policy)  			km->notify_policy(xp, dir, c); -	read_unlock(&xfrm_km_lock); +	rcu_read_unlock();  } -void km_state_notify(struct xfrm_state *x, struct km_event *c) +void km_state_notify(struct xfrm_state *x, const struct km_event *c)  {  	struct xfrm_mgr *km; -	read_lock(&xfrm_km_lock); -	list_for_each_entry(km, &xfrm_km_list, list) +	rcu_read_lock(); +	list_for_each_entry_rcu(km, &xfrm_km_list, list)  		if (km->notify)  			km->notify(x, c); -	read_unlock(&xfrm_km_lock); +	rcu_read_unlock();  }  EXPORT_SYMBOL(km_policy_notify);  EXPORT_SYMBOL(km_state_notify); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid) +void km_state_expired(struct xfrm_state *x, int hard, u32 portid)  { -	struct net *net = xs_net(x);  	struct km_event c;  	c.data.hard = hard; -	c.pid = pid; +	c.portid = portid;  	c.event = XFRM_MSG_EXPIRE;  	km_state_notify(x, &c); - -	if (hard) -		wake_up(&net->xfrm.km_waitq);  }  EXPORT_SYMBOL(km_state_expired); @@ -1775,13 +1746,13 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)  	int err = -EINVAL, acqret;  	struct xfrm_mgr *km; -	read_lock(&xfrm_km_lock); -	list_for_each_entry(km, &xfrm_km_list, list) { -		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT); +	rcu_read_lock(); +	list_for_each_entry_rcu(km, &xfrm_km_list, list) { +		acqret = km->acquire(x, t, pol);  		if (!acqret)  			err = acqret;  	} -	read_unlock(&xfrm_km_lock); +	rcu_read_unlock();  	return err;  }  EXPORT_SYMBOL(km_query); @@ -1791,51 +1762,47 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)  	int err = -EINVAL;  	struct xfrm_mgr *km; -	read_lock(&xfrm_km_lock); -	list_for_each_entry(km, &xfrm_km_list, list) { +	rcu_read_lock(); +	list_for_each_entry_rcu(km, &xfrm_km_list, list) {  		if (km->new_mapping)  			err = km->new_mapping(x, ipaddr, sport);  		if (!err)  			break;  	} -	read_unlock(&xfrm_km_lock); +	rcu_read_unlock();  	return err;  }  EXPORT_SYMBOL(km_new_mapping); -void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) +void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)  { -	struct net *net = xp_net(pol);  	struct km_event c;  	c.data.hard = hard; -	c.pid = pid; +	c.portid = portid;  	c.event = XFRM_MSG_POLEXPIRE;  	km_policy_notify(pol, dir, &c); - -	if (hard) -		wake_up(&net->xfrm.km_waitq);  }  EXPORT_SYMBOL(km_policy_expired);  #ifdef CONFIG_XFRM_MIGRATE -int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, -	       struct xfrm_migrate *m, int num_migrate, -	       struct xfrm_kmaddress *k) +int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, +	       const struct xfrm_migrate *m, int num_migrate, +	       const struct xfrm_kmaddress *k)  {  	int err = -EINVAL;  	int ret;  	struct xfrm_mgr *km; -	read_lock(&xfrm_km_lock); -	list_for_each_entry(km, &xfrm_km_list, list) { +	rcu_read_lock(); +	list_for_each_entry_rcu(km, &xfrm_km_list, list) {  		if (km->migrate) {  			ret = km->migrate(sel, dir, type, m, num_migrate, k);  			if (!ret)  				err = ret;  		}  	} -	read_unlock(&xfrm_km_lock); +	rcu_read_unlock();  	return err;  }  EXPORT_SYMBOL(km_migrate); @@ -1847,19 +1814,37 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address  	int ret;  	struct xfrm_mgr *km; -	read_lock(&xfrm_km_lock); -	list_for_each_entry(km, &xfrm_km_list, list) { +	rcu_read_lock(); +	list_for_each_entry_rcu(km, &xfrm_km_list, list) {  		if (km->report) {  			ret = km->report(net, proto, sel, addr);  			if (!ret)  				err = ret;  		}  	} -	read_unlock(&xfrm_km_lock); +	rcu_read_unlock();  	return err;  }  EXPORT_SYMBOL(km_report); +bool km_is_alive(const struct km_event *c) +{ +	struct xfrm_mgr *km; +	bool is_alive = false; + +	rcu_read_lock(); +	list_for_each_entry_rcu(km, &xfrm_km_list, list) { +		if (km->is_alive && km->is_alive(c)) { +			is_alive = true; +			break; +		} +	} +	rcu_read_unlock(); + +	return is_alive; +} +EXPORT_SYMBOL(km_is_alive); +  int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)  {  	int err; @@ -1879,14 +1864,14 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen  		goto out;  	err = -EINVAL; -	read_lock(&xfrm_km_lock); -	list_for_each_entry(km, &xfrm_km_list, list) { +	rcu_read_lock(); +	list_for_each_entry_rcu(km, &xfrm_km_list, list) {  		pol = km->compile_policy(sk, optname, data,  					 optlen, &err);  		if (err >= 0)  			break;  	} -	read_unlock(&xfrm_km_lock); +	rcu_read_unlock();  	if (err >= 0) {  		xfrm_sk_policy_insert(sk, err, pol); @@ -1900,20 +1885,23 @@ out:  }  EXPORT_SYMBOL(xfrm_user_policy); +static DEFINE_SPINLOCK(xfrm_km_lock); +  int xfrm_register_km(struct xfrm_mgr *km)  { -	write_lock_bh(&xfrm_km_lock); -	list_add_tail(&km->list, &xfrm_km_list); -	write_unlock_bh(&xfrm_km_lock); +	spin_lock_bh(&xfrm_km_lock); +	list_add_tail_rcu(&km->list, &xfrm_km_list); +	spin_unlock_bh(&xfrm_km_lock);  	return 0;  }  EXPORT_SYMBOL(xfrm_register_km);  int xfrm_unregister_km(struct xfrm_mgr *km)  { -	write_lock_bh(&xfrm_km_lock); -	list_del(&km->list); -	write_unlock_bh(&xfrm_km_lock); +	spin_lock_bh(&xfrm_km_lock); +	list_del_rcu(&km->list); +	spin_unlock_bh(&xfrm_km_lock); +	synchronize_rcu();  	return 0;  }  EXPORT_SYMBOL(xfrm_unregister_km); @@ -1925,12 +1913,12 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)  		return -EINVAL;  	if (unlikely(afinfo->family >= NPROTO))  		return -EAFNOSUPPORT; -	write_lock_bh(&xfrm_state_afinfo_lock); +	spin_lock_bh(&xfrm_state_afinfo_lock);  	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))  		err = -ENOBUFS;  	else -		xfrm_state_afinfo[afinfo->family] = afinfo; -	write_unlock_bh(&xfrm_state_afinfo_lock); +		rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo); +	spin_unlock_bh(&xfrm_state_afinfo_lock);  	return err;  }  EXPORT_SYMBOL(xfrm_state_register_afinfo); @@ -1942,34 +1930,34 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)  		return -EINVAL;  	if (unlikely(afinfo->family >= NPROTO))  		return -EAFNOSUPPORT; -	write_lock_bh(&xfrm_state_afinfo_lock); +	spin_lock_bh(&xfrm_state_afinfo_lock);  	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {  		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))  			err = -EINVAL;  		else -			xfrm_state_afinfo[afinfo->family] = NULL; +			RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);  	} -	write_unlock_bh(&xfrm_state_afinfo_lock); +	spin_unlock_bh(&xfrm_state_afinfo_lock); +	synchronize_rcu();  	return err;  }  EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) +struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)  {  	struct xfrm_state_afinfo *afinfo;  	if (unlikely(family >= NPROTO))  		return NULL; -	read_lock(&xfrm_state_afinfo_lock); -	afinfo = xfrm_state_afinfo[family]; +	rcu_read_lock(); +	afinfo = rcu_dereference(xfrm_state_afinfo[family]);  	if (unlikely(!afinfo)) -		read_unlock(&xfrm_state_afinfo_lock); +		rcu_read_unlock();  	return afinfo;  } -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) -	__releases(xfrm_state_afinfo_lock) +void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)  { -	read_unlock(&xfrm_state_afinfo_lock); +	rcu_read_unlock();  }  /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ @@ -2001,7 +1989,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)  	return res;  } -int xfrm_init_state(struct xfrm_state *x) +int __xfrm_init_state(struct xfrm_state *x, bool init_replay)  {  	struct xfrm_state_afinfo *afinfo;  	struct xfrm_mode *inner_mode; @@ -2071,8 +2059,16 @@ int xfrm_init_state(struct xfrm_state *x)  		goto error;  	x->outer_mode = xfrm_get_mode(x->props.mode, family); -	if (x->outer_mode == NULL) +	if (x->outer_mode == NULL) { +		err = -EPROTONOSUPPORT;  		goto error; +	} + +	if (init_replay) { +		err = xfrm_init_replay(x); +		if (err) +			goto error; +	}  	x->km.state = XFRM_STATE_VALID; @@ -2080,6 +2076,13 @@ error:  	return err;  } +EXPORT_SYMBOL(__xfrm_init_state); + +int xfrm_init_state(struct xfrm_state *x) +{ +	return __xfrm_init_state(x, true); +} +  EXPORT_SYMBOL(xfrm_init_state);  int __net_init xfrm_state_init(struct net *net) @@ -2105,7 +2108,7 @@ int __net_init xfrm_state_init(struct net *net)  	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);  	INIT_HLIST_HEAD(&net->xfrm.state_gc_list);  	INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); -	init_waitqueue_head(&net->xfrm.km_waitq); +	spin_lock_init(&net->xfrm.xfrm_state_lock);  	return 0;  out_byspi: @@ -2118,14 +2121,10 @@ out_bydst:  void xfrm_state_fini(struct net *net)  { -	struct xfrm_audit audit_info;  	unsigned int sz;  	flush_work(&net->xfrm.state_hash_work); -	audit_info.loginuid = -1; -	audit_info.sessionid = -1; -	audit_info.secid = 0; -	xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); +	xfrm_state_flush(net, IPSEC_PROTO_ANY, false);  	flush_work(&net->xfrm.state_gc_work);  	WARN_ON(!list_empty(&net->xfrm.state_all)); @@ -2150,7 +2149,7 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x,  		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",  				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); -	switch(x->props.family) { +	switch (x->props.family) {  	case AF_INET:  		audit_log_format(audit_buf, " src=%pI4 dst=%pI4",  				 &x->props.saddr.a4, &x->id.daddr.a4); @@ -2167,8 +2166,8 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x,  static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,  				      struct audit_buffer *audit_buf)  { -	struct iphdr *iph4; -	struct ipv6hdr *iph6; +	const struct iphdr *iph4; +	const struct ipv6hdr *iph6;  	switch (family) {  	case AF_INET: @@ -2180,7 +2179,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,  		iph6 = ipv6_hdr(skb);  		audit_log_format(audit_buf,  				 " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x", -				 &iph6->saddr,&iph6->daddr, +				 &iph6->saddr, &iph6->daddr,  				 iph6->flow_lbl[0] & 0x0f,  				 iph6->flow_lbl[1],  				 iph6->flow_lbl[2]); @@ -2188,30 +2187,28 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,  	}  } -void xfrm_audit_state_add(struct xfrm_state *x, int result, -			  uid_t auid, u32 sessionid, u32 secid) +void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)  {  	struct audit_buffer *audit_buf;  	audit_buf = xfrm_audit_start("SAD-add");  	if (audit_buf == NULL)  		return; -	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); +	xfrm_audit_helper_usrinfo(task_valid, audit_buf);  	xfrm_audit_helper_sainfo(x, audit_buf);  	audit_log_format(audit_buf, " res=%u", result);  	audit_log_end(audit_buf);  }  EXPORT_SYMBOL_GPL(xfrm_audit_state_add); -void xfrm_audit_state_delete(struct xfrm_state *x, int result, -			     uid_t auid, u32 sessionid, u32 secid) +void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)  {  	struct audit_buffer *audit_buf;  	audit_buf = xfrm_audit_start("SAD-delete");  	if (audit_buf == NULL)  		return; -	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); +	xfrm_audit_helper_usrinfo(task_valid, audit_buf);  	xfrm_audit_helper_sainfo(x, audit_buf);  	audit_log_format(audit_buf, " res=%u", result);  	audit_log_end(audit_buf); @@ -2236,7 +2233,7 @@ void xfrm_audit_state_replay_overflow(struct xfrm_state *x,  }  EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow); -static void xfrm_audit_state_replay(struct xfrm_state *x, +void xfrm_audit_state_replay(struct xfrm_state *x,  			     struct sk_buff *skb, __be32 net_seq)  {  	struct audit_buffer *audit_buf; @@ -2251,6 +2248,7 @@ static void xfrm_audit_state_replay(struct xfrm_state *x,  			 spi, spi, ntohl(net_seq));  	audit_log_end(audit_buf);  } +EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);  void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)  { diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index 05640bc9594..05a6e3d9c25 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -54,7 +54,11 @@ int __net_init xfrm_sysctl_init(struct net *net)  	table[2].data = &net->xfrm.sysctl_larval_drop;  	table[3].data = &net->xfrm.sysctl_acq_expires; -	net->xfrm.sysctl_hdr = register_net_sysctl_table(net, net_core_path, table); +	/* Don't export sysctls to unprivileged users */ +	if (net->user_ns != &init_user_ns) +		table[0].procname = NULL; + +	net->xfrm.sysctl_hdr = register_net_sysctl(net, "net/core", table);  	if (!net->xfrm.sysctl_hdr)  		goto out_register;  	return 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8bae6b22c84..d4db6ebb089 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -26,16 +26,12 @@  #include <net/sock.h>  #include <net/xfrm.h>  #include <net/netlink.h> +#include <net/ah.h>  #include <asm/uaccess.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  #include <linux/in6.h>  #endif -static inline int aead_len(struct xfrm_algo_aead *alg) -{ -	return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); -} -  static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)  {  	struct nlattr *rt = attrs[type]; @@ -118,6 +114,38 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs)  	return 0;  } +static inline int verify_replay(struct xfrm_usersa_info *p, +				struct nlattr **attrs) +{ +	struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; +	struct xfrm_replay_state_esn *rs; + +	if (p->flags & XFRM_STATE_ESN) { +		if (!rt) +			return -EINVAL; + +		rs = nla_data(rt); + +		if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) +			return -EINVAL; + +		if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && +		    nla_len(rt) != sizeof(*rs)) +			return -EINVAL; +	} + +	if (!rt) +		return 0; + +	/* As only ESP and AH support ESN feature. */ +	if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) +		return -EINVAL; + +	if (p->replay_window != 0) +		return -EINVAL; + +	return 0; +}  static int verify_newsa_info(struct xfrm_usersa_info *p,  			     struct nlattr **attrs) @@ -130,7 +158,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,  		break;  	case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  		break;  #else  		err = -EAFNOSUPPORT; @@ -148,7 +176,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,  		     !attrs[XFRMA_ALG_AUTH_TRUNC]) ||  		    attrs[XFRMA_ALG_AEAD]	||  		    attrs[XFRMA_ALG_CRYPT]	|| -		    attrs[XFRMA_ALG_COMP]) +		    attrs[XFRMA_ALG_COMP]	|| +		    attrs[XFRMA_TFCPAD])  			goto out;  		break; @@ -165,6 +194,9 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,  		     attrs[XFRMA_ALG_CRYPT]) &&  		    attrs[XFRMA_ALG_AEAD])  			goto out; +		if (attrs[XFRMA_TFCPAD] && +		    p->mode != XFRM_MODE_TUNNEL) +			goto out;  		break;  	case IPPROTO_COMP: @@ -172,11 +204,13 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,  		    attrs[XFRMA_ALG_AEAD]	||  		    attrs[XFRMA_ALG_AUTH]	||  		    attrs[XFRMA_ALG_AUTH_TRUNC]	|| -		    attrs[XFRMA_ALG_CRYPT]) +		    attrs[XFRMA_ALG_CRYPT]	|| +		    attrs[XFRMA_TFCPAD]		|| +		    (ntohl(p->id.spi) >= 0x10000))  			goto out;  		break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  	case IPPROTO_DSTOPTS:  	case IPPROTO_ROUTING:  		if (attrs[XFRMA_ALG_COMP]	|| @@ -186,6 +220,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,  		    attrs[XFRMA_ALG_CRYPT]	||  		    attrs[XFRMA_ENCAP]		||  		    attrs[XFRMA_SEC_CTX]	|| +		    attrs[XFRMA_TFCPAD]		||  		    !attrs[XFRMA_COADDR])  			goto out;  		break; @@ -207,6 +242,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,  		goto out;  	if ((err = verify_sec_ctx_len(attrs)))  		goto out; +	if ((err = verify_replay(p, attrs))) +		goto out;  	err = -EINVAL;  	switch (p->mode) { @@ -227,7 +264,7 @@ out:  }  static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, -			   struct xfrm_algo_desc *(*get_byname)(char *, int), +			   struct xfrm_algo_desc *(*get_byname)(const char *, int),  			   struct nlattr *rta)  {  	struct xfrm_algo *p, *ualg; @@ -296,7 +333,8 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props,  	algo = xfrm_aalg_get_byname(ualg->alg_name, 1);  	if (!algo)  		return -ENOSYS; -	if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) +	if ((ualg->alg_trunc_len / 8) > MAX_AH_AUTH_LEN || +	    ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits)  		return -EINVAL;  	*props = algo->desc.sadb_alg_id; @@ -337,6 +375,57 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,  	return 0;  } +static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn, +					 struct nlattr *rp) +{ +	struct xfrm_replay_state_esn *up; +	int ulen; + +	if (!replay_esn || !rp) +		return 0; + +	up = nla_data(rp); +	ulen = xfrm_replay_state_esn_len(up); + +	if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) +		return -EINVAL; + +	return 0; +} + +static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, +				       struct xfrm_replay_state_esn **preplay_esn, +				       struct nlattr *rta) +{ +	struct xfrm_replay_state_esn *p, *pp, *up; +	int klen, ulen; + +	if (!rta) +		return 0; + +	up = nla_data(rta); +	klen = xfrm_replay_state_esn_len(up); +	ulen = nla_len(rta) >= klen ? klen : sizeof(*up); + +	p = kzalloc(klen, GFP_KERNEL); +	if (!p) +		return -ENOMEM; + +	pp = kzalloc(klen, GFP_KERNEL); +	if (!pp) { +		kfree(p); +		return -ENOMEM; +	} + +	memcpy(p, up, ulen); +	memcpy(pp, up, ulen); + +	*replay_esn = p; +	*preplay_esn = pp; + +	return 0; +} +  static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)  {  	int len = 0; @@ -354,7 +443,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *  	memcpy(&x->sel, &p->sel, sizeof(x->sel));  	memcpy(&x->lft, &p->lft, sizeof(x->lft));  	x->props.mode = p->mode; -	x->props.replay_window = p->replay_window; +	x->props.replay_window = min_t(unsigned int, p->replay_window, +					sizeof(x->replay.bitmap) * 8);  	x->props.reqid = p->reqid;  	x->props.family = p->family;  	memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); @@ -369,13 +459,24 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *   * somehow made shareable and move it to xfrm_state.c - JHS   *  */ -static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, +				  int update_esn)  {  	struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; +	struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL;  	struct nlattr *lt = attrs[XFRMA_LTIME_VAL];  	struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];  	struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; +	if (re) { +		struct xfrm_replay_state_esn *replay_esn; +		replay_esn = nla_data(re); +		memcpy(x->replay_esn, replay_esn, +		       xfrm_replay_state_esn_len(replay_esn)); +		memcpy(x->preplay_esn, replay_esn, +		       xfrm_replay_state_esn_len(replay_esn)); +	} +  	if (rp) {  		struct xfrm_replay_state *replay;  		replay = nla_data(rp); @@ -412,6 +513,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,  	copy_from_user_state(x, p); +	if (attrs[XFRMA_SA_EXTRA_FLAGS]) +		x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); +  	if ((err = attach_aead(&x->aead, &x->props.ealgo,  			       attrs[XFRMA_ALG_AEAD])))  		goto error; @@ -439,6 +543,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,  			goto error;  	} +	if (attrs[XFRMA_TFCPAD]) +		x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]); +  	if (attrs[XFRMA_COADDR]) {  		x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),  				    sizeof(*x->coaddr), GFP_KERNEL); @@ -448,7 +555,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,  	xfrm_mark_get(attrs, &x->mark); -	err = xfrm_init_state(x); +	err = __xfrm_init_state(x, false);  	if (err)  		goto error; @@ -456,17 +563,20 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,  	    security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX])))  		goto error; +	if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, +					       attrs[XFRMA_REPLAY_ESN_VAL]))) +		goto error; +  	x->km.seq = p->seq;  	x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth;  	/* sysctl_xfrm_aevent_etime is in 100ms units */  	x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; -	x->preplay.bitmap = 0; -	x->preplay.seq = x->replay.seq+x->replay_maxdiff; -	x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; -	/* override default values from above */ +	if ((err = xfrm_init_replay(x))) +		goto error; -	xfrm_update_ae_params(x, attrs); +	/* override default values from above */ +	xfrm_update_ae_params(x, attrs, 0);  	return x; @@ -486,9 +596,6 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,  	struct xfrm_state *x;  	int err;  	struct km_event c; -	uid_t loginuid = NETLINK_CB(skb).loginuid; -	u32 sessionid = NETLINK_CB(skb).sessionid; -	u32 sid = NETLINK_CB(skb).sid;  	err = verify_newsa_info(p, attrs);  	if (err) @@ -504,7 +611,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,  	else  		err = xfrm_state_update(x); -	xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid); +	xfrm_audit_state_add(x, err ? 0 : 1, true);  	if (err < 0) {  		x->km.state = XFRM_STATE_DEAD; @@ -513,7 +620,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,  	}  	c.seq = nlh->nlmsg_seq; -	c.pid = nlh->nlmsg_pid; +	c.portid = nlh->nlmsg_pid;  	c.event = nlh->nlmsg_type;  	km_state_notify(x, &c); @@ -564,9 +671,6 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,  	int err = -ESRCH;  	struct km_event c;  	struct xfrm_usersa_id *p = nlmsg_data(nlh); -	uid_t loginuid = NETLINK_CB(skb).loginuid; -	u32 sessionid = NETLINK_CB(skb).sessionid; -	u32 sid = NETLINK_CB(skb).sid;  	x = xfrm_user_state_lookup(net, p, attrs, &err);  	if (x == NULL) @@ -586,18 +690,19 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,  		goto out;  	c.seq = nlh->nlmsg_seq; -	c.pid = nlh->nlmsg_pid; +	c.portid = nlh->nlmsg_pid;  	c.event = nlh->nlmsg_type;  	km_state_notify(x, &c);  out: -	xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid); +	xfrm_audit_state_delete(x, err ? 0 : 1, true);  	xfrm_state_put(x);  	return err;  }  static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)  { +	memset(p, 0, sizeof(*p));  	memcpy(&p->id, &x->id, sizeof(p->id));  	memcpy(&p->sel, &x->sel, sizeof(p->sel));  	memcpy(&p->lft, &x->lft, sizeof(p->lft)); @@ -651,7 +756,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)  		return -EMSGSIZE;  	algo = nla_data(nla); -	strcpy(algo->alg_name, auth->alg_name); +	strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name));  	memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8);  	algo->alg_key_len = auth->alg_key_len; @@ -663,41 +768,74 @@ static int copy_to_user_state_extra(struct xfrm_state *x,  				    struct xfrm_usersa_info *p,  				    struct sk_buff *skb)  { -	copy_to_user_state(x, p); +	int ret = 0; -	if (x->coaddr) -		NLA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); +	copy_to_user_state(x, p); -	if (x->lastused) -		NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused); +	if (x->props.extra_flags) { +		ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS, +				  x->props.extra_flags); +		if (ret) +			goto out; +	} -	if (x->aead) -		NLA_PUT(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); +	if (x->coaddr) { +		ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); +		if (ret) +			goto out; +	} +	if (x->lastused) { +		ret = nla_put_u64(skb, XFRMA_LASTUSED, x->lastused); +		if (ret) +			goto out; +	} +	if (x->aead) { +		ret = nla_put(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); +		if (ret) +			goto out; +	}  	if (x->aalg) { -		if (copy_to_user_auth(x->aalg, skb)) -			goto nla_put_failure; - -		NLA_PUT(skb, XFRMA_ALG_AUTH_TRUNC, -			xfrm_alg_auth_len(x->aalg), x->aalg); +		ret = copy_to_user_auth(x->aalg, skb); +		if (!ret) +			ret = nla_put(skb, XFRMA_ALG_AUTH_TRUNC, +				      xfrm_alg_auth_len(x->aalg), x->aalg); +		if (ret) +			goto out;  	} -	if (x->ealg) -		NLA_PUT(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); -	if (x->calg) -		NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); - -	if (x->encap) -		NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); - -	if (xfrm_mark_put(skb, &x->mark)) -		goto nla_put_failure; - -	if (x->security && copy_sec_ctx(x->security, skb) < 0) -		goto nla_put_failure; - -	return 0; - -nla_put_failure: -	return -EMSGSIZE; +	if (x->ealg) { +		ret = nla_put(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); +		if (ret) +			goto out; +	} +	if (x->calg) { +		ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); +		if (ret) +			goto out; +	} +	if (x->encap) { +		ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); +		if (ret) +			goto out; +	} +	if (x->tfcpad) { +		ret = nla_put_u32(skb, XFRMA_TFCPAD, x->tfcpad); +		if (ret) +			goto out; +	} +	ret = xfrm_mark_put(skb, &x->mark); +	if (ret) +		goto out; +	if (x->replay_esn) { +		ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL, +			      xfrm_replay_state_esn_len(x->replay_esn), +			      x->replay_esn); +		if (ret) +			goto out; +	} +	if (x->security) +		ret = copy_sec_ctx(x->security, skb); +out: +	return ret;  }  static int dump_one_state(struct xfrm_state *x, int count, void *ptr) @@ -709,7 +847,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)  	struct nlmsghdr *nlh;  	int err; -	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, +	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq,  			XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags);  	if (nlh == NULL)  		return -EMSGSIZE; @@ -717,24 +855,25 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)  	p = nlmsg_data(nlh);  	err = copy_to_user_state_extra(x, p, skb); -	if (err) -		goto nla_put_failure; - +	if (err) { +		nlmsg_cancel(skb, nlh); +		return err; +	}  	nlmsg_end(skb, nlh);  	return 0; - -nla_put_failure: -	nlmsg_cancel(skb, nlh); -	return err;  }  static int xfrm_dump_sa_done(struct netlink_callback *cb)  {  	struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; -	xfrm_state_walk_done(walk); +	struct sock *sk = cb->skb->sk; +	struct net *net = sock_net(sk); + +	xfrm_state_walk_done(walk, net);  	return 0;  } +static const struct nla_policy xfrma_policy[XFRMA_MAX+1];  static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)  {  	struct net *net = sock_net(skb->sk); @@ -750,8 +889,31 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)  	info.nlmsg_flags = NLM_F_MULTI;  	if (!cb->args[0]) { +		struct nlattr *attrs[XFRMA_MAX+1]; +		struct xfrm_address_filter *filter = NULL; +		u8 proto = 0; +		int err; +  		cb->args[0] = 1; -		xfrm_state_walk_init(walk, 0); + +		err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, +				  xfrma_policy); +		if (err < 0) +			return err; + +		if (attrs[XFRMA_ADDRESS_FILTER]) { +			filter = kmalloc(sizeof(*filter), GFP_KERNEL); +			if (filter == NULL) +				return -ENOMEM; + +			memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]), +			       sizeof(*filter)); +		} + +		if (attrs[XFRMA_PROTO]) +			proto = nla_get_u8(attrs[XFRMA_PROTO]); + +		xfrm_state_walk_init(walk, proto, filter);  	}  	(void) xfrm_state_walk(net, walk, dump_one_state, &info); @@ -764,6 +926,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,  {  	struct xfrm_dump_info info;  	struct sk_buff *skb; +	int err;  	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);  	if (!skb) @@ -774,14 +937,29 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,  	info.nlmsg_seq = seq;  	info.nlmsg_flags = 0; -	if (dump_one_state(x, 0, &info)) { +	err = dump_one_state(x, 0, &info); +	if (err) {  		kfree_skb(skb); -		return NULL; +		return ERR_PTR(err);  	}  	return skb;  } +/* A wrapper for nlmsg_multicast() checking that nlsk is still available. + * Must be called with RCU read lock. + */ +static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, +				       u32 pid, unsigned int group) +{ +	struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); + +	if (nlsk) +		return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); +	else +		return -1; +} +  static inline size_t xfrm_spdinfo_msgsize(void)  {  	return NLMSG_ALIGN(4) @@ -790,16 +968,17 @@ static inline size_t xfrm_spdinfo_msgsize(void)  }  static int build_spdinfo(struct sk_buff *skb, struct net *net, -			 u32 pid, u32 seq, u32 flags) +			 u32 portid, u32 seq, u32 flags)  {  	struct xfrmk_spdinfo si;  	struct xfrmu_spdinfo spc;  	struct xfrmu_spdhinfo sph;  	struct nlmsghdr *nlh; +	int err;  	u32 *f; -	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); -	if (nlh == NULL) /* shouldnt really happen ... */ +	nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); +	if (nlh == NULL) /* shouldn't really happen ... */  		return -EMSGSIZE;  	f = nlmsg_data(nlh); @@ -814,14 +993,15 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,  	sph.spdhcnt = si.spdhcnt;  	sph.spdhmcnt = si.spdhmcnt; -	NLA_PUT(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); -	NLA_PUT(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); +	err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); +	if (!err) +		err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); +	if (err) { +		nlmsg_cancel(skb, nlh); +		return err; +	}  	return nlmsg_end(skb, nlh); - -nla_put_failure: -	nlmsg_cancel(skb, nlh); -	return -EMSGSIZE;  }  static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -830,17 +1010,17 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,  	struct net *net = sock_net(skb->sk);  	struct sk_buff *r_skb;  	u32 *flags = nlmsg_data(nlh); -	u32 spid = NETLINK_CB(skb).pid; +	u32 sportid = NETLINK_CB(skb).portid;  	u32 seq = nlh->nlmsg_seq;  	r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC);  	if (r_skb == NULL)  		return -ENOMEM; -	if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0) +	if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0)  		BUG(); -	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); +	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);  }  static inline size_t xfrm_sadinfo_msgsize(void) @@ -851,15 +1031,16 @@ static inline size_t xfrm_sadinfo_msgsize(void)  }  static int build_sadinfo(struct sk_buff *skb, struct net *net, -			 u32 pid, u32 seq, u32 flags) +			 u32 portid, u32 seq, u32 flags)  {  	struct xfrmk_sadinfo si;  	struct xfrmu_sadhinfo sh;  	struct nlmsghdr *nlh; +	int err;  	u32 *f; -	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); -	if (nlh == NULL) /* shouldnt really happen ... */ +	nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); +	if (nlh == NULL) /* shouldn't really happen ... */  		return -EMSGSIZE;  	f = nlmsg_data(nlh); @@ -869,14 +1050,15 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,  	sh.sadhmcnt = si.sadhmcnt;  	sh.sadhcnt = si.sadhcnt; -	NLA_PUT_U32(skb, XFRMA_SAD_CNT, si.sadcnt); -	NLA_PUT(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh); +	err = nla_put_u32(skb, XFRMA_SAD_CNT, si.sadcnt); +	if (!err) +		err = nla_put(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh); +	if (err) { +		nlmsg_cancel(skb, nlh); +		return err; +	}  	return nlmsg_end(skb, nlh); - -nla_put_failure: -	nlmsg_cancel(skb, nlh); -	return -EMSGSIZE;  }  static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -885,17 +1067,17 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,  	struct net *net = sock_net(skb->sk);  	struct sk_buff *r_skb;  	u32 *flags = nlmsg_data(nlh); -	u32 spid = NETLINK_CB(skb).pid; +	u32 sportid = NETLINK_CB(skb).portid;  	u32 seq = nlh->nlmsg_seq;  	r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC);  	if (r_skb == NULL)  		return -ENOMEM; -	if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0) +	if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0)  		BUG(); -	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); +	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);  }  static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -915,36 +1097,13 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,  	if (IS_ERR(resp_skb)) {  		err = PTR_ERR(resp_skb);  	} else { -		err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); +		err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);  	}  	xfrm_state_put(x);  out_noput:  	return err;  } -static int verify_userspi_info(struct xfrm_userspi_info *p) -{ -	switch (p->info.id.proto) { -	case IPPROTO_AH: -	case IPPROTO_ESP: -		break; - -	case IPPROTO_COMP: -		/* IPCOMP spi is 16-bits. */ -		if (p->max >= 0x10000) -			return -EINVAL; -		break; - -	default: -		return -EINVAL; -	} - -	if (p->min > p->max) -		return -EINVAL; - -	return 0; -} -  static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,  		struct nlattr **attrs)  { @@ -959,7 +1118,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,  	struct xfrm_mark m;  	p = nlmsg_data(nlh); -	err = verify_userspi_info(p); +	err = verify_spi_info(p->info.id.proto, p->min, p->max);  	if (err)  		goto out_noput; @@ -971,7 +1130,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,  	mark = xfrm_mark_get(attrs, &m);  	if (p->info.seq) {  		x = xfrm_find_acq_byseq(net, mark, p->info.seq); -		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { +		if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {  			xfrm_state_put(x);  			x = NULL;  		} @@ -996,7 +1155,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,  		goto out;  	} -	err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); +	err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);  out:  	xfrm_state_put(x); @@ -1037,6 +1196,8 @@ static int verify_policy_type(u8 type)  static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)  { +	int ret; +  	switch (p->share) {  	case XFRM_SHARE_ANY:  	case XFRM_SHARE_SESSION: @@ -1062,7 +1223,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)  		break;  	case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  		break;  #else  		return  -EAFNOSUPPORT; @@ -1072,7 +1233,13 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)  		return -EINVAL;  	} -	return verify_policy_dir(p->dir); +	ret = verify_policy_dir(p->dir); +	if (ret) +		return ret; +	if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) +		return -EINVAL; + +	return 0;  }  static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs) @@ -1084,7 +1251,7 @@ static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs  		return 0;  	uctx = nla_data(rt); -	return security_xfrm_policy_alloc(&pol->security, uctx); +	return security_xfrm_policy_alloc(&pol->security, uctx, GFP_KERNEL);  }  static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, @@ -1133,7 +1300,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)  		switch (ut[i].family) {  		case AF_INET:  			break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  		case AF_INET6:  			break;  #endif @@ -1199,6 +1366,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy  static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir)  { +	memset(p, 0, sizeof(*p));  	memcpy(&p->sel, &xp->selector, sizeof(p->sel));  	memcpy(&p->lft, &xp->lft, sizeof(p->lft));  	memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); @@ -1251,9 +1419,6 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,  	struct km_event c;  	int err;  	int excl; -	uid_t loginuid = NETLINK_CB(skb).loginuid; -	u32 sessionid = NETLINK_CB(skb).sessionid; -	u32 sid = NETLINK_CB(skb).sid;  	err = verify_newpolicy_info(p);  	if (err) @@ -1266,13 +1431,13 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,  	if (!xp)  		return err; -	/* shouldnt excl be based on nlh flags?? +	/* shouldn't excl be based on nlh flags??  	 * Aha! this is anti-netlink really i.e  more pfkey derived  	 * in netlink excl is a flag and you wouldnt need  	 * a type XFRM_MSG_UPDPOLICY - JHS */  	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;  	err = xfrm_policy_insert(p->dir, xp, excl); -	xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid); +	xfrm_audit_policy_add(xp, err ? 0 : 1, true);  	if (err) {  		security_xfrm_policy_free(xp->security); @@ -1282,7 +1447,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,  	c.event = nlh->nlmsg_type;  	c.seq = nlh->nlmsg_seq; -	c.pid = nlh->nlmsg_pid; +	c.portid = nlh->nlmsg_pid;  	km_policy_notify(xp, p->dir, &c);  	xfrm_pol_put(xp); @@ -1302,6 +1467,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)  		struct xfrm_user_tmpl *up = &vec[i];  		struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; +		memset(up, 0, sizeof(*up));  		memcpy(&up->id, &kp->id, sizeof(up->id));  		up->family = kp->encap_family;  		memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); @@ -1328,9 +1494,8 @@ static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buf  static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)  { -	if (xp->security) { +	if (xp->security)  		return copy_sec_ctx(xp->security, skb); -	}  	return 0;  }  static inline size_t userpolicy_type_attrsize(void) @@ -1366,37 +1531,36 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr  	struct sk_buff *in_skb = sp->in_skb;  	struct sk_buff *skb = sp->out_skb;  	struct nlmsghdr *nlh; +	int err; -	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, +	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq,  			XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags);  	if (nlh == NULL)  		return -EMSGSIZE;  	p = nlmsg_data(nlh);  	copy_to_user_policy(xp, p, dir); -	if (copy_to_user_tmpl(xp, skb) < 0) -		goto nlmsg_failure; -	if (copy_to_user_sec_ctx(xp, skb)) -		goto nlmsg_failure; -	if (copy_to_user_policy_type(xp->type, skb) < 0) -		goto nlmsg_failure; -	if (xfrm_mark_put(skb, &xp->mark)) -		goto nla_put_failure; - +	err = copy_to_user_tmpl(xp, skb); +	if (!err) +		err = copy_to_user_sec_ctx(xp, skb); +	if (!err) +		err = copy_to_user_policy_type(xp->type, skb); +	if (!err) +		err = xfrm_mark_put(skb, &xp->mark); +	if (err) { +		nlmsg_cancel(skb, nlh); +		return err; +	}  	nlmsg_end(skb, nlh);  	return 0; - -nla_put_failure: -nlmsg_failure: -	nlmsg_cancel(skb, nlh); -	return -EMSGSIZE;  }  static int xfrm_dump_policy_done(struct netlink_callback *cb)  {  	struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; +	struct net *net = sock_net(cb->skb->sk); -	xfrm_policy_walk_done(walk); +	xfrm_policy_walk_done(walk, net);  	return 0;  } @@ -1430,6 +1594,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,  {  	struct xfrm_dump_info info;  	struct sk_buff *skb; +	int err;  	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);  	if (!skb) @@ -1440,9 +1605,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,  	info.nlmsg_seq = seq;  	info.nlmsg_flags = 0; -	if (dump_one_policy(xp, dir, 0, &info) < 0) { +	err = dump_one_policy(xp, dir, 0, &info); +	if (err) {  		kfree_skb(skb); -		return NULL; +		return ERR_PTR(err);  	}  	return skb; @@ -1486,7 +1652,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,  		if (rt) {  			struct xfrm_user_sec_ctx *uctx = nla_data(rt); -			err = security_xfrm_policy_alloc(&ctx, uctx); +			err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL);  			if (err)  				return err;  		} @@ -1505,15 +1671,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,  			err = PTR_ERR(resp_skb);  		} else {  			err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, -					    NETLINK_CB(skb).pid); +					    NETLINK_CB(skb).portid);  		}  	} else { -		uid_t loginuid = NETLINK_CB(skb).loginuid; -		u32 sessionid = NETLINK_CB(skb).sessionid; -		u32 sid = NETLINK_CB(skb).sid; - -		xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid, -					 sid); +		xfrm_audit_policy_delete(xp, err ? 0 : 1, true);  		if (err != 0)  			goto out; @@ -1521,12 +1682,14 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,  		c.data.byid = p->index;  		c.event = nlh->nlmsg_type;  		c.seq = nlh->nlmsg_seq; -		c.pid = nlh->nlmsg_pid; +		c.portid = nlh->nlmsg_pid;  		km_policy_notify(xp, p->dir, &c);  	}  out:  	xfrm_pol_put(xp); +	if (delete && err == 0) +		xfrm_garbage_collect(net);  	return err;  } @@ -1536,13 +1699,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,  	struct net *net = sock_net(skb->sk);  	struct km_event c;  	struct xfrm_usersa_flush *p = nlmsg_data(nlh); -	struct xfrm_audit audit_info;  	int err; -	audit_info.loginuid = NETLINK_CB(skb).loginuid; -	audit_info.sessionid = NETLINK_CB(skb).sessionid; -	audit_info.secid = NETLINK_CB(skb).sid; -	err = xfrm_state_flush(net, p->proto, &audit_info); +	err = xfrm_state_flush(net, p->proto, true);  	if (err) {  		if (err == -ESRCH) /* empty table */  			return 0; @@ -1551,59 +1710,80 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,  	c.data.proto = p->proto;  	c.event = nlh->nlmsg_type;  	c.seq = nlh->nlmsg_seq; -	c.pid = nlh->nlmsg_pid; +	c.portid = nlh->nlmsg_pid;  	c.net = net;  	km_state_notify(NULL, &c);  	return 0;  } -static inline size_t xfrm_aevent_msgsize(void) +static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x)  { +	size_t replay_size = x->replay_esn ? +			      xfrm_replay_state_esn_len(x->replay_esn) : +			      sizeof(struct xfrm_replay_state); +  	return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) -	       + nla_total_size(sizeof(struct xfrm_replay_state)) +	       + nla_total_size(replay_size)  	       + nla_total_size(sizeof(struct xfrm_lifetime_cur))  	       + nla_total_size(sizeof(struct xfrm_mark))  	       + nla_total_size(4) /* XFRM_AE_RTHR */  	       + nla_total_size(4); /* XFRM_AE_ETHR */  } -static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) +static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)  {  	struct xfrm_aevent_id *id;  	struct nlmsghdr *nlh; +	int err; -	nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); +	nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0);  	if (nlh == NULL)  		return -EMSGSIZE;  	id = nlmsg_data(nlh); -	memcpy(&id->sa_id.daddr, &x->id.daddr,sizeof(x->id.daddr)); +	memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr));  	id->sa_id.spi = x->id.spi;  	id->sa_id.family = x->props.family;  	id->sa_id.proto = x->id.proto; -	memcpy(&id->saddr, &x->props.saddr,sizeof(x->props.saddr)); +	memcpy(&id->saddr, &x->props.saddr, sizeof(x->props.saddr));  	id->reqid = x->props.reqid;  	id->flags = c->data.aevent; -	NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); -	NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); - -	if (id->flags & XFRM_AE_RTHR) -		NLA_PUT_U32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); - -	if (id->flags & XFRM_AE_ETHR) -		NLA_PUT_U32(skb, XFRMA_ETIMER_THRESH, -			    x->replay_maxage * 10 / HZ); +	if (x->replay_esn) { +		err = nla_put(skb, XFRMA_REPLAY_ESN_VAL, +			      xfrm_replay_state_esn_len(x->replay_esn), +			      x->replay_esn); +	} else { +		err = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), +			      &x->replay); +	} +	if (err) +		goto out_cancel; +	err = nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); +	if (err) +		goto out_cancel; -	if (xfrm_mark_put(skb, &x->mark)) -		goto nla_put_failure; +	if (id->flags & XFRM_AE_RTHR) { +		err = nla_put_u32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); +		if (err) +			goto out_cancel; +	} +	if (id->flags & XFRM_AE_ETHR) { +		err = nla_put_u32(skb, XFRMA_ETIMER_THRESH, +				  x->replay_maxage * 10 / HZ); +		if (err) +			goto out_cancel; +	} +	err = xfrm_mark_put(skb, &x->mark); +	if (err) +		goto out_cancel;  	return nlmsg_end(skb, nlh); -nla_put_failure: +out_cancel:  	nlmsg_cancel(skb, nlh); -	return -EMSGSIZE; +	return err;  }  static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1619,16 +1799,16 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,  	struct xfrm_aevent_id *p = nlmsg_data(nlh);  	struct xfrm_usersa_id *id = &p->sa_id; -	r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); -	if (r_skb == NULL) -		return -ENOMEM; -  	mark = xfrm_mark_get(attrs, &m);  	x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); -	if (x == NULL) { -		kfree_skb(r_skb); +	if (x == NULL)  		return -ESRCH; + +	r_skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); +	if (r_skb == NULL) { +		xfrm_state_put(x); +		return -ENOMEM;  	}  	/* @@ -1639,11 +1819,11 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,  	spin_lock_bh(&x->lock);  	c.data.aevent = p->flags;  	c.seq = nlh->nlmsg_seq; -	c.pid = nlh->nlmsg_pid; +	c.portid = nlh->nlmsg_pid;  	if (build_aevent(r_skb, x, &c) < 0)  		BUG(); -	err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid); +	err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);  	spin_unlock_bh(&x->lock);  	xfrm_state_put(x);  	return err; @@ -1655,14 +1835,15 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,  	struct net *net = sock_net(skb->sk);  	struct xfrm_state *x;  	struct km_event c; -	int err = - EINVAL; +	int err = -EINVAL;  	u32 mark = 0;  	struct xfrm_mark m;  	struct xfrm_aevent_id *p = nlmsg_data(nlh);  	struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; +	struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];  	struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; -	if (!lt && !rp) +	if (!lt && !rp && !re)  		return err;  	/* pedantic mode - thou shalt sayeth replaceth */ @@ -1678,13 +1859,17 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,  	if (x->km.state != XFRM_STATE_VALID)  		goto out; +	err = xfrm_replay_verify_len(x->replay_esn, re); +	if (err) +		goto out; +  	spin_lock_bh(&x->lock); -	xfrm_update_ae_params(x, attrs); +	xfrm_update_ae_params(x, attrs, 1);  	spin_unlock_bh(&x->lock);  	c.event = nlh->nlmsg_type;  	c.seq = nlh->nlmsg_seq; -	c.pid = nlh->nlmsg_pid; +	c.portid = nlh->nlmsg_pid;  	c.data.aevent = XFRM_AE_CU;  	km_state_notify(x, &c);  	err = 0; @@ -1700,16 +1885,12 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,  	struct km_event c;  	u8 type = XFRM_POLICY_TYPE_MAIN;  	int err; -	struct xfrm_audit audit_info;  	err = copy_from_user_policy_type(&type, attrs);  	if (err)  		return err; -	audit_info.loginuid = NETLINK_CB(skb).loginuid; -	audit_info.sessionid = NETLINK_CB(skb).sessionid; -	audit_info.secid = NETLINK_CB(skb).sid; -	err = xfrm_policy_flush(net, type, &audit_info); +	err = xfrm_policy_flush(net, type, true);  	if (err) {  		if (err == -ESRCH) /* empty table */  			return 0; @@ -1719,7 +1900,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,  	c.data.type = type;  	c.event = nlh->nlmsg_type;  	c.seq = nlh->nlmsg_seq; -	c.pid = nlh->nlmsg_pid; +	c.portid = nlh->nlmsg_pid;  	c.net = net;  	km_policy_notify(NULL, 0, &c);  	return 0; @@ -1759,7 +1940,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,  		if (rt) {  			struct xfrm_user_sec_ctx *uctx = nla_data(rt); -			err = security_xfrm_policy_alloc(&ctx, uctx); +			err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL);  			if (err)  				return err;  		} @@ -1775,17 +1956,13 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,  	err = 0;  	if (up->hard) { -		uid_t loginuid = NETLINK_CB(skb).loginuid; -		uid_t sessionid = NETLINK_CB(skb).sessionid; -		u32 sid = NETLINK_CB(skb).sid;  		xfrm_policy_delete(xp, p->dir); -		xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid); - +		xfrm_audit_policy_delete(xp, 1, true);  	} else {  		// reset the timers here?  		WARN(1, "Dont know what to do with soft policy expire\n");  	} -	km_policy_expired(xp, p->dir, up->hard, current->pid); +	km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid);  out:  	xfrm_pol_put(xp); @@ -1813,14 +1990,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,  	err = -EINVAL;  	if (x->km.state != XFRM_STATE_VALID)  		goto out; -	km_state_expired(x, ue->hard, current->pid); +	km_state_expired(x, ue->hard, nlh->nlmsg_pid);  	if (ue->hard) { -		uid_t loginuid = NETLINK_CB(skb).loginuid; -		uid_t sessionid = NETLINK_CB(skb).sessionid; -		u32 sid = NETLINK_CB(skb).sid;  		__xfrm_state_delete(x); -		xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid); +		xfrm_audit_state_delete(x, 1, true);  	}  	err = 0;  out: @@ -1942,6 +2116,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,  	u8 type;  	int err;  	int n = 0; +	struct net *net = sock_net(skb->sk);  	if (attrs[XFRMA_MIGRATE] == NULL)  		return -EINVAL; @@ -1959,7 +2134,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,  	if (!n)  		return 0; -	xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp); +	xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net);  	return 0;  } @@ -1972,7 +2147,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,  #endif  #ifdef CONFIG_XFRM_MIGRATE -static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) +static int copy_to_user_migrate(const struct xfrm_migrate *m, struct sk_buff *skb)  {  	struct xfrm_user_migrate um; @@ -1990,7 +2165,7 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb)  	return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um);  } -static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) +static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff *skb)  {  	struct xfrm_user_kmaddress uk; @@ -2011,14 +2186,14 @@ static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma)  	      + userpolicy_type_attrsize();  } -static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, -			 int num_migrate, struct xfrm_kmaddress *k, -			 struct xfrm_selector *sel, u8 dir, u8 type) +static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, +			 int num_migrate, const struct xfrm_kmaddress *k, +			 const struct xfrm_selector *sel, u8 dir, u8 type)  { -	struct xfrm_migrate *mp; +	const struct xfrm_migrate *mp;  	struct xfrm_userpolicy_id *pol_id;  	struct nlmsghdr *nlh; -	int i; +	int i, err;  	nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0);  	if (nlh == NULL) @@ -2030,26 +2205,30 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,  	memcpy(&pol_id->sel, sel, sizeof(pol_id->sel));  	pol_id->dir = dir; -	if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0)) -			goto nlmsg_failure; - -	if (copy_to_user_policy_type(type, skb) < 0) -		goto nlmsg_failure; - +	if (k != NULL) { +		err = copy_to_user_kmaddress(k, skb); +		if (err) +			goto out_cancel; +	} +	err = copy_to_user_policy_type(type, skb); +	if (err) +		goto out_cancel;  	for (i = 0, mp = m ; i < num_migrate; i++, mp++) { -		if (copy_to_user_migrate(mp, skb) < 0) -			goto nlmsg_failure; +		err = copy_to_user_migrate(mp, skb); +		if (err) +			goto out_cancel;  	}  	return nlmsg_end(skb, nlh); -nlmsg_failure: + +out_cancel:  	nlmsg_cancel(skb, nlh); -	return -EMSGSIZE; +	return err;  } -static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, -			     struct xfrm_migrate *m, int num_migrate, -			     struct xfrm_kmaddress *k) +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, +			     const struct xfrm_migrate *m, int num_migrate, +			     const struct xfrm_kmaddress *k)  {  	struct net *net = &init_net;  	struct sk_buff *skb; @@ -2062,12 +2241,12 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,  	if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0)  		BUG(); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);  }  #else -static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, -			     struct xfrm_migrate *m, int num_migrate, -			     struct xfrm_kmaddress *k) +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, +			     const struct xfrm_migrate *m, int num_migrate, +			     const struct xfrm_kmaddress *k)  {  	return -ENOPROTOOPT;  } @@ -2122,9 +2301,14 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {  	[XFRMA_MIGRATE]		= { .len = sizeof(struct xfrm_user_migrate) },  	[XFRMA_KMADDRESS]	= { .len = sizeof(struct xfrm_user_kmaddress) },  	[XFRMA_MARK]		= { .len = sizeof(struct xfrm_mark) }, +	[XFRMA_TFCPAD]		= { .type = NLA_U32 }, +	[XFRMA_REPLAY_ESN_VAL]	= { .len = sizeof(struct xfrm_replay_state_esn) }, +	[XFRMA_SA_EXTRA_FLAGS]	= { .type = NLA_U32 }, +	[XFRMA_PROTO]		= { .type = NLA_U8 }, +	[XFRMA_ADDRESS_FILTER]	= { .len = sizeof(struct xfrm_address_filter) },  }; -static struct xfrm_link { +static const struct xfrm_link {  	int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);  	int (*dump)(struct sk_buff *, struct netlink_callback *);  	int (*done)(struct netlink_callback *); @@ -2158,7 +2342,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct nlattr *attrs[XFRMA_MAX+1]; -	struct xfrm_link *link; +	const struct xfrm_link *link;  	int type, err;  	type = nlh->nlmsg_type; @@ -2169,7 +2353,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  	link = &xfrm_dispatch[type];  	/* All operations require privileges, even GET */ -	if (security_netlink_recv(skb, CAP_NET_ADMIN)) +	if (!netlink_net_capable(skb, CAP_NET_ADMIN))  		return -EPERM;  	if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || @@ -2178,7 +2362,13 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  		if (link->dump == NULL)  			return -EINVAL; -		return netlink_dump_start(net->xfrm.nlsk, skb, nlh, link->dump, link->done); +		{ +			struct netlink_dump_control c = { +				.dump = link->dump, +				.done = link->done, +			}; +			return netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); +		}  	}  	err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, @@ -2194,9 +2384,11 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  static void xfrm_netlink_rcv(struct sk_buff *skb)  { -	mutex_lock(&xfrm_cfg_mutex); +	struct net *net = sock_net(skb->sk); + +	mutex_lock(&net->xfrm.xfrm_cfg_mutex);  	netlink_rcv_skb(skb, &xfrm_user_rcv_msg); -	mutex_unlock(&xfrm_cfg_mutex); +	mutex_unlock(&net->xfrm.xfrm_cfg_mutex);  }  static inline size_t xfrm_expire_msgsize(void) @@ -2205,12 +2397,13 @@ static inline size_t xfrm_expire_msgsize(void)  	       + nla_total_size(sizeof(struct xfrm_mark));  } -static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) +static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)  {  	struct xfrm_user_expire *ue;  	struct nlmsghdr *nlh; +	int err; -	nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); +	nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0);  	if (nlh == NULL)  		return -EMSGSIZE; @@ -2218,16 +2411,14 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve  	copy_to_user_state(x, &ue->state);  	ue->hard = (c->data.hard != 0) ? 1 : 0; -	if (xfrm_mark_put(skb, &x->mark)) -		goto nla_put_failure; +	err = xfrm_mark_put(skb, &x->mark); +	if (err) +		return err;  	return nlmsg_end(skb, nlh); - -nla_put_failure: -	return -EMSGSIZE;  } -static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)  {  	struct net *net = xs_net(x);  	struct sk_buff *skb; @@ -2241,25 +2432,25 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)  		return -EMSGSIZE;  	} -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);  } -static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c)  {  	struct net *net = xs_net(x);  	struct sk_buff *skb; -	skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); +	skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);  	if (skb == NULL)  		return -ENOMEM;  	if (build_aevent(skb, x, c) < 0)  		BUG(); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);  } -static int xfrm_notify_sa_flush(struct km_event *c) +static int xfrm_notify_sa_flush(const struct km_event *c)  {  	struct net *net = c->net;  	struct xfrm_usersa_flush *p; @@ -2271,7 +2462,7 @@ static int xfrm_notify_sa_flush(struct km_event *c)  	if (skb == NULL)  		return -ENOMEM; -	nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); +	nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0);  	if (nlh == NULL) {  		kfree_skb(skb);  		return -EMSGSIZE; @@ -2282,7 +2473,7 @@ static int xfrm_notify_sa_flush(struct km_event *c)  	nlmsg_end(skb, nlh); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);  }  static inline size_t xfrm_sa_len(struct xfrm_state *x) @@ -2301,11 +2492,17 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)  		l += nla_total_size(sizeof(*x->calg));  	if (x->encap)  		l += nla_total_size(sizeof(*x->encap)); +	if (x->tfcpad) +		l += nla_total_size(sizeof(x->tfcpad)); +	if (x->replay_esn) +		l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn));  	if (x->security)  		l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) +  				    x->security->ctx_len);  	if (x->coaddr)  		l += nla_total_size(sizeof(*x->coaddr)); +	if (x->props.extra_flags) +		l += nla_total_size(sizeof(x->props.extra_flags));  	/* Must count x->lastused as it may become non-zero behind our back. */  	l += nla_total_size(sizeof(u64)); @@ -2313,7 +2510,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)  	return l;  } -static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) +static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)  {  	struct net *net = xs_net(x);  	struct xfrm_usersa_info *p; @@ -2321,7 +2518,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)  	struct nlmsghdr *nlh;  	struct sk_buff *skb;  	int len = xfrm_sa_len(x); -	int headlen; +	int headlen, err;  	headlen = sizeof(*p);  	if (c->event == XFRM_MSG_DELSA) { @@ -2335,9 +2532,10 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)  	if (skb == NULL)  		return -ENOMEM; -	nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); +	nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); +	err = -EMSGSIZE;  	if (nlh == NULL) -		goto nla_put_failure; +		goto out_free_skb;  	p = nlmsg_data(nlh);  	if (c->event == XFRM_MSG_DELSA) { @@ -2350,27 +2548,26 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)  		id->proto = x->id.proto;  		attr = nla_reserve(skb, XFRMA_SA, sizeof(*p)); +		err = -EMSGSIZE;  		if (attr == NULL) -			goto nla_put_failure; +			goto out_free_skb;  		p = nla_data(attr);  	} - -	if (copy_to_user_state_extra(x, p, skb)) -		goto nla_put_failure; +	err = copy_to_user_state_extra(x, p, skb); +	if (err) +		goto out_free_skb;  	nlmsg_end(skb, nlh); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); -nla_put_failure: -	/* Somebody screwed up with xfrm_sa_len! */ -	WARN_ON(1); +out_free_skb:  	kfree_skb(skb); -	return -1; +	return err;  } -static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c)  {  	switch (c->event) { @@ -2405,12 +2602,12 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x,  }  static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, -			 struct xfrm_tmpl *xt, struct xfrm_policy *xp, -			 int dir) +			 struct xfrm_tmpl *xt, struct xfrm_policy *xp)  { +	__u32 seq = xfrm_get_acqseq();  	struct xfrm_user_acquire *ua;  	struct nlmsghdr *nlh; -	__u32 seq = xfrm_get_acqseq(); +	int err;  	nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0);  	if (nlh == NULL) @@ -2420,31 +2617,29 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,  	memcpy(&ua->id, &x->id, sizeof(ua->id));  	memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));  	memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); -	copy_to_user_policy(xp, &ua->policy, dir); +	copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT);  	ua->aalgos = xt->aalgos;  	ua->ealgos = xt->ealgos;  	ua->calgos = xt->calgos;  	ua->seq = x->km.seq = seq; -	if (copy_to_user_tmpl(xp, skb) < 0) -		goto nlmsg_failure; -	if (copy_to_user_state_sec_ctx(x, skb)) -		goto nlmsg_failure; -	if (copy_to_user_policy_type(xp->type, skb) < 0) -		goto nlmsg_failure; -	if (xfrm_mark_put(skb, &xp->mark)) -		goto nla_put_failure; +	err = copy_to_user_tmpl(xp, skb); +	if (!err) +		err = copy_to_user_state_sec_ctx(x, skb); +	if (!err) +		err = copy_to_user_policy_type(xp->type, skb); +	if (!err) +		err = xfrm_mark_put(skb, &xp->mark); +	if (err) { +		nlmsg_cancel(skb, nlh); +		return err; +	}  	return nlmsg_end(skb, nlh); - -nla_put_failure: -nlmsg_failure: -	nlmsg_cancel(skb, nlh); -	return -EMSGSIZE;  }  static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, -			     struct xfrm_policy *xp, int dir) +			     struct xfrm_policy *xp)  {  	struct net *net = xs_net(x);  	struct sk_buff *skb; @@ -2453,10 +2648,10 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,  	if (skb == NULL)  		return -ENOMEM; -	if (build_acquire(skb, x, xt, xp, dir) < 0) +	if (build_acquire(skb, x, xt, xp) < 0)  		BUG(); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);  }  /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2478,7 +2673,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,  			return NULL;  		}  		break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  	case AF_INET6:  		if (opt != IPV6_XFRM_POLICY) {  			*dir = -EOPNOTSUPP; @@ -2529,37 +2724,36 @@ static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp)  }  static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, -			   int dir, struct km_event *c) +			   int dir, const struct km_event *c)  {  	struct xfrm_user_polexpire *upe; -	struct nlmsghdr *nlh;  	int hard = c->data.hard; +	struct nlmsghdr *nlh; +	int err; -	nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); +	nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0);  	if (nlh == NULL)  		return -EMSGSIZE;  	upe = nlmsg_data(nlh);  	copy_to_user_policy(xp, &upe->pol, dir); -	if (copy_to_user_tmpl(xp, skb) < 0) -		goto nlmsg_failure; -	if (copy_to_user_sec_ctx(xp, skb)) -		goto nlmsg_failure; -	if (copy_to_user_policy_type(xp->type, skb) < 0) -		goto nlmsg_failure; -	if (xfrm_mark_put(skb, &xp->mark)) -		goto nla_put_failure; +	err = copy_to_user_tmpl(xp, skb); +	if (!err) +		err = copy_to_user_sec_ctx(xp, skb); +	if (!err) +		err = copy_to_user_policy_type(xp->type, skb); +	if (!err) +		err = xfrm_mark_put(skb, &xp->mark); +	if (err) { +		nlmsg_cancel(skb, nlh); +		return err; +	}  	upe->hard = !!hard;  	return nlmsg_end(skb, nlh); - -nla_put_failure: -nlmsg_failure: -	nlmsg_cancel(skb, nlh); -	return -EMSGSIZE;  } -static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)  {  	struct net *net = xp_net(xp);  	struct sk_buff *skb; @@ -2571,18 +2765,18 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve  	if (build_polexpire(skb, xp, dir, c) < 0)  		BUG(); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);  } -static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)  { +	int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);  	struct net *net = xp_net(xp);  	struct xfrm_userpolicy_info *p;  	struct xfrm_userpolicy_id *id;  	struct nlmsghdr *nlh;  	struct sk_buff *skb; -	int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); -	int headlen; +	int headlen, err;  	headlen = sizeof(*p);  	if (c->event == XFRM_MSG_DELPOLICY) { @@ -2597,9 +2791,10 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *  	if (skb == NULL)  		return -ENOMEM; -	nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); +	nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); +	err = -EMSGSIZE;  	if (nlh == NULL) -		goto nlmsg_failure; +		goto out_free_skb;  	p = nlmsg_data(nlh);  	if (c->event == XFRM_MSG_DELPOLICY) { @@ -2614,57 +2809,60 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *  			memcpy(&id->sel, &xp->selector, sizeof(id->sel));  		attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p)); +		err = -EMSGSIZE;  		if (attr == NULL) -			goto nlmsg_failure; +			goto out_free_skb;  		p = nla_data(attr);  	}  	copy_to_user_policy(xp, p, dir); -	if (copy_to_user_tmpl(xp, skb) < 0) -		goto nlmsg_failure; -	if (copy_to_user_policy_type(xp->type, skb) < 0) -		goto nlmsg_failure; - -	if (xfrm_mark_put(skb, &xp->mark)) -		goto nla_put_failure; +	err = copy_to_user_tmpl(xp, skb); +	if (!err) +		err = copy_to_user_policy_type(xp->type, skb); +	if (!err) +		err = xfrm_mark_put(skb, &xp->mark); +	if (err) +		goto out_free_skb;  	nlmsg_end(skb, nlh); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); -nla_put_failure: -nlmsg_failure: +out_free_skb:  	kfree_skb(skb); -	return -1; +	return err;  } -static int xfrm_notify_policy_flush(struct km_event *c) +static int xfrm_notify_policy_flush(const struct km_event *c)  {  	struct net *net = c->net;  	struct nlmsghdr *nlh;  	struct sk_buff *skb; +	int err;  	skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC);  	if (skb == NULL)  		return -ENOMEM; -	nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); +	nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); +	err = -EMSGSIZE;  	if (nlh == NULL) -		goto nlmsg_failure; -	if (copy_to_user_policy_type(c->data.type, skb) < 0) -		goto nlmsg_failure; +		goto out_free_skb; +	err = copy_to_user_policy_type(c->data.type, skb); +	if (err) +		goto out_free_skb;  	nlmsg_end(skb, nlh); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); -nlmsg_failure: +out_free_skb:  	kfree_skb(skb); -	return -1; +	return err;  } -static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)  {  	switch (c->event) { @@ -2704,14 +2902,14 @@ static int build_report(struct sk_buff *skb, u8 proto,  	ur->proto = proto;  	memcpy(&ur->sel, sel, sizeof(ur->sel)); -	if (addr) -		NLA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); - +	if (addr) { +		int err = nla_put(skb, XFRMA_COADDR, sizeof(*addr), addr); +		if (err) { +			nlmsg_cancel(skb, nlh); +			return err; +		} +	}  	return nlmsg_end(skb, nlh); - -nla_put_failure: -	nlmsg_cancel(skb, nlh); -	return -EMSGSIZE;  }  static int xfrm_send_report(struct net *net, u8 proto, @@ -2726,7 +2924,7 @@ static int xfrm_send_report(struct net *net, u8 proto,  	if (build_report(skb, proto, sel, addr) < 0)  		BUG(); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);  }  static inline size_t xfrm_mapping_msgsize(void) @@ -2778,7 +2976,12 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,  	if (build_mapping(skb, x, ipaddr, sport) < 0)  		BUG(); -	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); +	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); +} + +static bool xfrm_is_alive(const struct km_event *c) +{ +	return (bool)xfrm_acquire_is_on(c->net);  }  static struct xfrm_mgr netlink_mgr = { @@ -2790,14 +2993,18 @@ static struct xfrm_mgr netlink_mgr = {  	.report		= xfrm_send_report,  	.migrate	= xfrm_send_migrate,  	.new_mapping	= xfrm_send_mapping, +	.is_alive	= xfrm_is_alive,  };  static int __net_init xfrm_user_net_init(struct net *net)  {  	struct sock *nlsk; +	struct netlink_kernel_cfg cfg = { +		.groups	= XFRMNLGRP_MAX, +		.input	= xfrm_netlink_rcv, +	}; -	nlsk = netlink_kernel_create(net, NETLINK_XFRM, XFRMNLGRP_MAX, -				     xfrm_netlink_rcv, NULL, THIS_MODULE); +	nlsk = netlink_kernel_create(net, NETLINK_XFRM, &cfg);  	if (nlsk == NULL)  		return -ENOMEM;  	net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ @@ -2809,7 +3016,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list)  {  	struct net *net;  	list_for_each_entry(net, net_exit_list, exit_list) -		rcu_assign_pointer(net->xfrm.nlsk, NULL); +		RCU_INIT_POINTER(net->xfrm.nlsk, NULL);  	synchronize_net();  	list_for_each_entry(net, net_exit_list, exit_list)  		netlink_kernel_release(net->xfrm.nlsk_stash);  | 
