diff options
Diffstat (limited to 'net/xfrm')
| -rw-r--r-- | net/xfrm/Kconfig | 33 | ||||
| -rw-r--r-- | net/xfrm/Makefile | 6 | ||||
| -rw-r--r-- | net/xfrm/xfrm_algo.c | 252 | ||||
| -rw-r--r-- | net/xfrm/xfrm_hash.c | 2 | ||||
| -rw-r--r-- | net/xfrm/xfrm_hash.h | 49 | ||||
| -rw-r--r-- | net/xfrm/xfrm_input.c | 156 | ||||
| -rw-r--r-- | net/xfrm/xfrm_ipcomp.c | 386 | ||||
| -rw-r--r-- | net/xfrm/xfrm_output.c | 112 | ||||
| -rw-r--r-- | net/xfrm/xfrm_policy.c | 2330 | ||||
| -rw-r--r-- | net/xfrm/xfrm_proc.c | 53 | ||||
| -rw-r--r-- | net/xfrm/xfrm_replay.c | 603 | ||||
| -rw-r--r-- | net/xfrm/xfrm_state.c | 1415 | ||||
| -rw-r--r-- | net/xfrm/xfrm_sysctl.c | 86 | ||||
| -rw-r--r-- | net/xfrm/xfrm_user.c | 1418 |
14 files changed, 4662 insertions, 2239 deletions
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 9201ef8ad90..bda1a13628a 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -3,12 +3,17 @@ # config XFRM bool - select CRYPTO depends on NET +config XFRM_ALGO + tristate + select XFRM + select CRYPTO + config XFRM_USER tristate "Transformation user configuration interface" - depends on INET && XFRM + depends on INET + select XFRM_ALGO ---help--- Support for Transformation(XFRM) user configuration interface like IPsec used by native Linux tools. @@ -16,8 +21,8 @@ config XFRM_USER If unsure, say Y. config XFRM_SUB_POLICY - bool "Transformation sub policy support (EXPERIMENTAL)" - depends on XFRM && EXPERIMENTAL + bool "Transformation sub policy support" + depends on XFRM ---help--- Support sub policy for developers. By using sub policy with main one, two policies can be applied to the same packet at once. @@ -26,8 +31,8 @@ config XFRM_SUB_POLICY If unsure, say N. config XFRM_MIGRATE - bool "Transformation migrate database (EXPERIMENTAL)" - depends on XFRM && EXPERIMENTAL + bool "Transformation migrate database" + depends on XFRM ---help--- A feature to update locator(s) of a given IPsec security association dynamically. This feature is required, for @@ -37,8 +42,8 @@ config XFRM_MIGRATE If unsure, say N. config XFRM_STATISTICS - bool "Transformation statistics (EXPERIMENTAL)" - depends on INET && XFRM && PROC_FS && EXPERIMENTAL + bool "Transformation statistics" + depends on INET && XFRM && PROC_FS ---help--- This statistics is not a SNMP/MIB specification but shows statistics about transformation error (or almost error) factor @@ -46,9 +51,15 @@ config XFRM_STATISTICS If unsure, say N. +config XFRM_IPCOMP + tristate + select XFRM_ALGO + select CRYPTO + select CRYPTO_DEFLATE + config NET_KEY tristate "PF_KEY sockets" - select XFRM + select XFRM_ALGO ---help--- PF_KEYv2 socket family, compatible to KAME ones. They are required if you are going to use IPsec tools ported @@ -57,8 +68,8 @@ config NET_KEY Say Y unless you know what you are doing. config NET_KEY_MIGRATE - bool "PF_KEY MIGRATE (EXPERIMENTAL)" - depends on NET_KEY && EXPERIMENTAL + bool "PF_KEY MIGRATE" + depends on NET_KEY select XFRM_MIGRATE ---help--- Add a PF_KEY MIGRATE message to PF_KEYv2 socket family. diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 332cfb0ff56..c0e961983f1 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,7 +3,9 @@ # obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ - xfrm_input.o xfrm_output.o xfrm_algo.o + xfrm_input.o xfrm_output.o \ + xfrm_sysctl.o xfrm_replay.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o +obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o - +obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 8aa6440d689..debe733386f 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -15,9 +15,6 @@ #include <linux/crypto.h> #include <linux/scatterlist.h> #include <net/xfrm.h> -#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) -#include <net/ah.h> -#endif #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) #include <net/esp.h> #endif @@ -38,6 +35,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8, .sadb_alg_ivlen = 8, @@ -54,6 +53,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12, .sadb_alg_ivlen = 8, @@ -70,6 +71,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16, .sadb_alg_ivlen = 8, @@ -86,6 +89,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8, .sadb_alg_ivlen = 8, @@ -102,6 +107,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12, .sadb_alg_ivlen = 8, @@ -118,6 +125,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16, .sadb_alg_ivlen = 8, @@ -125,12 +134,29 @@ static struct xfrm_algo_desc aead_list[] = { .sadb_alg_maxbits = 256 } }, +{ + .name = "rfc4543(gcm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, }; static struct xfrm_algo_desc aalg_list[] = { { - .name = "hmac(digest_null)", - .compat = "digest_null", + .name = "digest_null", .uinfo = { .auth = { @@ -139,6 +165,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_NULL, .sadb_alg_ivlen = 0, @@ -157,6 +185,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_AALG_MD5HMAC, .sadb_alg_ivlen = 0, @@ -175,6 +205,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_AALG_SHA1HMAC, .sadb_alg_ivlen = 0, @@ -193,6 +225,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, .sadb_alg_ivlen = 0, @@ -201,8 +235,46 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "hmac(ripemd160)", - .compat = "ripemd160", + .name = "hmac(sha384)", + + .uinfo = { + .auth = { + .icv_truncbits = 192, + .icv_fullbits = 384, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SHA2_384HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 384, + .sadb_alg_maxbits = 384 + } +}, +{ + .name = "hmac(sha512)", + + .uinfo = { + .auth = { + .icv_truncbits = 256, + .icv_fullbits = 512, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SHA2_512HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 512, + .sadb_alg_maxbits = 512 + } +}, +{ + .name = "hmac(rmd160)", + .compat = "rmd160", .uinfo = { .auth = { @@ -211,6 +283,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, .sadb_alg_ivlen = 0, @@ -228,6 +302,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC, .sadb_alg_ivlen = 0, @@ -235,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = { .sadb_alg_maxbits = 128 } }, +{ + /* rfc4494 */ + .name = "cmac(aes)", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 128, + } + }, + + .pfkey_supported = 0, +}, }; static struct xfrm_algo_desc ealg_list[] = { @@ -249,6 +338,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_NULL, .sadb_alg_ivlen = 0, @@ -267,6 +358,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_DESCBC, .sadb_alg_ivlen = 8, @@ -285,6 +378,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_3DESCBC, .sadb_alg_ivlen = 8, @@ -293,8 +388,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "cbc(cast128)", - .compat = "cast128", + .name = "cbc(cast5)", + .compat = "cast5", .uinfo = { .encr = { @@ -303,6 +398,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_CASTCBC, .sadb_alg_ivlen = 8, @@ -321,6 +418,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, .sadb_alg_ivlen = 8, @@ -339,6 +438,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AESCBC, .sadb_alg_ivlen = 8, @@ -357,6 +458,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_SERPENTCBC, .sadb_alg_ivlen = 8, @@ -366,6 +469,7 @@ static struct xfrm_algo_desc ealg_list[] = { }, { .name = "cbc(camellia)", + .compat = "camellia", .uinfo = { .encr = { @@ -374,6 +478,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_CAMELLIACBC, .sadb_alg_ivlen = 8, @@ -392,6 +498,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_TWOFISHCBC, .sadb_alg_ivlen = 8, @@ -409,11 +517,13 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AESCTR, .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 128, - .sadb_alg_maxbits = 256 + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 288 } }, }; @@ -426,6 +536,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 90, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE } }, { @@ -435,6 +546,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 90, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZS } }, { @@ -444,6 +556,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 50, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZJH } }, }; @@ -568,21 +681,21 @@ static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry, (entry->compat && !strcmp(name, entry->compat))); } -struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name, probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); -struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name, probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); -struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name, probe); @@ -604,7 +717,7 @@ static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry, !strcmp(name, entry->name); } -struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, int probe) +struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe) { struct xfrm_aead_name data = { .name = name, @@ -653,8 +766,7 @@ void xfrm_probe_algs(void) } for (i = 0; i < ealg_entries(); i++) { - status = crypto_has_blkcipher(ealg_list[i].name, 0, - CRYPTO_ALG_ASYNC); + status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0); if (ealg_list[i].available != status) ealg_list[i].available = status; } @@ -668,118 +780,26 @@ void xfrm_probe_algs(void) } EXPORT_SYMBOL_GPL(xfrm_probe_algs); -int xfrm_count_auth_supported(void) +int xfrm_count_pfkey_auth_supported(void) { int i, n; for (i = 0, n = 0; i < aalg_entries(); i++) - if (aalg_list[i].available) + if (aalg_list[i].available && aalg_list[i].pfkey_supported) n++; return n; } -EXPORT_SYMBOL_GPL(xfrm_count_auth_supported); +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_auth_supported); -int xfrm_count_enc_supported(void) +int xfrm_count_pfkey_enc_supported(void) { int i, n; for (i = 0, n = 0; i < ealg_entries(); i++) - if (ealg_list[i].available) + if (ealg_list[i].available && ealg_list[i].pfkey_supported) n++; return n; } -EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); - -/* Move to common area: it is shared with AH. */ +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported); -int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, - int offset, int len, icv_update_fn_t icv_update) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int err; - struct scatterlist sg; - - /* Checksum header. */ - if (copy > 0) { - if (copy > len) - copy = len; - - sg_init_one(&sg, skb->data + offset, copy); - - err = icv_update(desc, &sg, copy); - if (unlikely(err)) - return err; - - if ((len -= copy) == 0) - return 0; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - - sg_init_table(&sg, 1); - sg_set_page(&sg, frag->page, copy, - frag->page_offset + offset-start); - - err = icv_update(desc, &sg, copy); - if (unlikely(err)) - return err; - - if (!(len -= copy)) - return 0; - offset += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - err = skb_icv_walk(list, desc, offset-start, - copy, icv_update); - if (unlikely(err)) - return err; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - start = end; - } - } - BUG_ON(len); - return 0; -} -EXPORT_SYMBOL_GPL(skb_icv_walk); - -#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) - -void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) -{ - if (tail != skb) { - skb->data_len += len; - skb->len += len; - } - return skb_put(tail, len); -} -EXPORT_SYMBOL_GPL(pskb_put); -#endif +MODULE_LICENSE("GPL"); diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c index a2023ec5232..1e98bc0fe0a 100644 --- a/net/xfrm/xfrm_hash.c +++ b/net/xfrm/xfrm_hash.c @@ -19,7 +19,7 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz) if (sz <= PAGE_SIZE) n = kzalloc(sz, GFP_KERNEL); else if (hashdist) - n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + n = vzalloc(sz); else n = (struct hlist_head *) __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index d401dc8f05e..0622d319e1f 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -4,28 +4,32 @@ #include <linux/xfrm.h> #include <linux/socket.h> -static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr) { return ntohl(addr->a4); } -static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr) { return ntohl(addr->a6[2] ^ addr->a6[3]); } -static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr) { - return ntohl(daddr->a4 ^ saddr->a4); + u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; + return ntohl((__force __be32)sum); } -static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr) { return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ saddr->a6[2] ^ saddr->a6[3]); } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, +static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, u32 reqid, unsigned short family, unsigned int hmask) { @@ -41,10 +45,10 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t return (h ^ (h >> 16)) & hmask; } -static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, - xfrm_address_t *saddr, - unsigned short family, - unsigned int hmask) +static inline unsigned int __xfrm_src_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, + unsigned int hmask) { unsigned int h = family; switch (family) { @@ -54,13 +58,13 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, case AF_INET6: h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; - }; + } return (h ^ (h >> 16)) & hmask; } static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, - unsigned int hmask) +__xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto, + unsigned short family, unsigned int hmask) { unsigned int h = (__force u32)spi ^ proto; switch (family) { @@ -79,10 +83,11 @@ static inline unsigned int __idx_hash(u32 index, unsigned int hmask) return (index ^ (index >> 8)) & hmask; } -static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) +static inline unsigned int __sel_hash(const struct xfrm_selector *sel, + unsigned short family, unsigned int hmask) { - xfrm_address_t *daddr = &sel->daddr; - xfrm_address_t *saddr = &sel->saddr; + const xfrm_address_t *daddr = &sel->daddr; + const xfrm_address_t *saddr = &sel->saddr; unsigned int h = 0; switch (family) { @@ -101,12 +106,14 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short h = __xfrm6_daddr_saddr_hash(daddr, saddr); break; - }; + } h ^= (h >> 16); return h & hmask; } -static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) +static inline unsigned int __addr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, unsigned int hmask) { unsigned int h = 0; @@ -118,12 +125,12 @@ static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *sa case AF_INET6: h = __xfrm6_daddr_saddr_hash(daddr, saddr); break; - }; + } h ^= (h >> 16); return h & hmask; } -extern struct hlist_head *xfrm_hash_alloc(unsigned int sz); -extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz); +struct hlist_head *xfrm_hash_alloc(unsigned int sz); +void xfrm_hash_free(struct hlist_head *n, unsigned int sz); #endif /* _XFRM_HASH_H */ diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 75279402ccf..85d1d476461 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -16,6 +16,81 @@ static struct kmem_cache *secpath_cachep __read_mostly; +static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); +static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; + +int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) +{ + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); + if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) + err = -ENOBUFS; + else + rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); + spin_unlock_bh(&xfrm_input_afinfo_lock); + return err; +} +EXPORT_SYMBOL(xfrm_input_register_afinfo); + +int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) +{ + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); + if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { + if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) + err = -EINVAL; + else + RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); + } + spin_unlock_bh(&xfrm_input_afinfo_lock); + synchronize_rcu(); + return err; +} +EXPORT_SYMBOL(xfrm_input_unregister_afinfo); + +static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +{ + struct xfrm_input_afinfo *afinfo; + + if (unlikely(family >= NPROTO)) + return NULL; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_input_afinfo[family]); + if (unlikely(!afinfo)) + rcu_read_unlock(); + return afinfo; +} + +static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo) +{ + rcu_read_unlock(); +} + +static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, + int err) +{ + int ret; + struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + + if (!afinfo) + return -EAFNOSUPPORT; + + ret = afinfo->callback(skb, protocol, err); + xfrm_input_put_afinfo(afinfo); + + return ret; +} + void __secpath_destroy(struct sec_path *sp) { int i; @@ -67,7 +142,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) case IPPROTO_COMP: if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) return -EINVAL; - *spi = htonl(ntohs(*(__be16*)(skb_transport_header(skb) + 2))); + *spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2))); *seq = 0; return 0; default: @@ -77,8 +152,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) if (!pskb_may_pull(skb, hlen)) return -EINVAL; - *spi = *(__be32*)(skb_transport_header(skb) + offset); - *seq = *(__be32*)(skb_transport_header(skb) + offset_seq); + *spi = *(__be32 *)(skb_transport_header(skb) + offset); + *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); return 0; } @@ -104,9 +179,11 @@ EXPORT_SYMBOL(xfrm_prepare_input); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { + struct net *net = dev_net(skb->dev); int err; __be32 seq; - struct xfrm_state *x; + __be32 seq_hi; + struct xfrm_state *x = NULL; xfrm_address_t *daddr; struct xfrm_mode *inner_mode; unsigned int family; @@ -117,17 +194,22 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0) { async = 1; x = xfrm_input_state(skb); - seq = XFRM_SKB_CB(skb)->seq.input; + seq = XFRM_SKB_CB(skb)->seq.input.low; + family = x->outer_mode->afinfo->family; goto resume; } + daddr = (xfrm_address_t *)(skb_network_header(skb) + + XFRM_SPI_SKB_CB(skb)->daddroff); + family = XFRM_SPI_SKB_CB(skb)->family; + /* Allocate new secpath or COW existing one. */ if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { struct sec_path *sp; sp = secpath_dup(skb->sp); if (!sp) { - XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (skb->sp) @@ -135,61 +217,71 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp = sp; } - daddr = (xfrm_address_t *)(skb_network_header(skb) + - XFRM_SPI_SKB_CB(skb)->daddroff); - family = XFRM_SPI_SKB_CB(skb)->family; - seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } do { if (skb->sp->len == XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } - x = xfrm_state_lookup(daddr, spi, nexthdr, family); + x = xfrm_state_lookup(net, skb->mark, daddr, spi, nexthdr, family); if (x == NULL) { - XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); goto drop; } skb->sp->xvec[skb->sp->len++] = x; + if (xfrm_tunnel_check(skb, x, family)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); + goto drop; + } + spin_lock(&x->lock); + if (unlikely(x->km.state == XFRM_STATE_ACQ)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); + goto drop_unlock; + } + if (unlikely(x->km.state != XFRM_STATE_VALID)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); goto drop_unlock; } if ((x->encap ? x->encap->encap_type : 0) != encap_type) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); goto drop_unlock; } - if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATESEQERROR); + if (x->repl->check(x, skb, seq)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } if (xfrm_state_check_expire(x)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEEXPIRED); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED); goto drop_unlock; } spin_unlock(&x->lock); - XFRM_SKB_CB(skb)->seq.input = seq; + seq_hi = htonl(xfrm_replay_seqhi(x, seq)); + + XFRM_SKB_CB(skb)->seq.input.low = seq; + XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; + + skb_dst_force(skb); nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) return 0; - resume: spin_lock(&x->lock); if (nexthdr <= 0) { @@ -198,15 +290,19 @@ resume: x->type->proto); x->stats.integrity_failed++; } - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEPROTOERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); goto drop_unlock; } /* only the first xfrm gets the encap type */ encap_type = 0; - if (x->props.replay_window) - xfrm_replay_advance(x, seq); + if (async && x->repl->recheck(x, skb, seq)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); + goto drop_unlock; + } + + x->repl->advance(x, seq); x->curlft.bytes += skb->len; x->curlft.packets++; @@ -224,7 +320,7 @@ resume: } if (inner_mode->input(x, skb)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -242,16 +338,19 @@ resume: err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } } while (!err); + err = xfrm_rcv_cb(skb, family, x->type->proto, 0); + if (err) + goto drop; + nf_reset(skb); if (decaps) { - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); netif_rx(skb); return 0; } else { @@ -261,6 +360,7 @@ resume: drop_unlock: spin_unlock(&x->lock); drop: + xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); kfree_skb(skb); return 0; } diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c new file mode 100644 index 00000000000..ccfdc7115a8 --- /dev/null +++ b/net/xfrm/xfrm_ipcomp.c @@ -0,0 +1,386 @@ +/* + * IP Payload Compression Protocol (IPComp) - RFC3173. + * + * Copyright (c) 2003 James Morris <jmorris@intercode.com.au> + * Copyright (c) 2003-2008 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Todo: + * - Tunable compression parameters. + * - Compression stats. + * - Adaptive compression. + */ + +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/vmalloc.h> +#include <net/ip.h> +#include <net/ipcomp.h> +#include <net/xfrm.h> + +struct ipcomp_tfms { + struct list_head list; + struct crypto_comp * __percpu *tfms; + int users; +}; + +static DEFINE_MUTEX(ipcomp_resource_mutex); +static void * __percpu *ipcomp_scratches; +static int ipcomp_scratch_users; +static LIST_HEAD(ipcomp_tfms_list); + +static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + const u8 *start = skb->data; + const int cpu = get_cpu(); + u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); + struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); + int len; + + if (err) + goto out; + + if (dlen < (plen + sizeof(struct ip_comp_hdr))) { + err = -EINVAL; + goto out; + } + + len = dlen - plen; + if (len > skb_tailroom(skb)) + len = skb_tailroom(skb); + + __skb_put(skb, len); + + len += plen; + skb_copy_to_linear_data(skb, scratch, len); + + while ((scratch += len, dlen -= len) > 0) { + skb_frag_t *frag; + struct page *page; + + err = -EMSGSIZE; + if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) + goto out; + + frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; + page = alloc_page(GFP_ATOMIC); + + err = -ENOMEM; + if (!page) + goto out; + + __skb_frag_set_page(frag, page); + + len = PAGE_SIZE; + if (dlen < len) + len = dlen; + + frag->page_offset = 0; + skb_frag_size_set(frag, len); + memcpy(skb_frag_address(frag), scratch, len); + + skb->truesize += len; + skb->data_len += len; + skb->len += len; + + skb_shinfo(skb)->nr_frags++; + } + + err = 0; + +out: + put_cpu(); + return err; +} + +int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int nexthdr; + int err = -ENOMEM; + struct ip_comp_hdr *ipch; + + if (skb_linearize_cow(skb)) + goto out; + + skb->ip_summed = CHECKSUM_NONE; + + /* Remove ipcomp header and decompress original payload */ + ipch = (void *)skb->data; + nexthdr = ipch->nexthdr; + + skb->transport_header = skb->network_header + sizeof(*ipch); + __skb_pull(skb, sizeof(*ipch)); + err = ipcomp_decompress(x, skb); + if (err) + goto out; + + err = nexthdr; + +out: + return err; +} +EXPORT_SYMBOL_GPL(ipcomp_input); + +static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + u8 *start = skb->data; + struct crypto_comp *tfm; + u8 *scratch; + int err; + + local_bh_disable(); + scratch = *this_cpu_ptr(ipcomp_scratches); + tfm = *this_cpu_ptr(ipcd->tfms); + err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); + if (err) + goto out; + + if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) { + err = -EMSGSIZE; + goto out; + } + + memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); + local_bh_enable(); + + pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); + return 0; + +out: + local_bh_enable(); + return err; +} + +int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + struct ip_comp_hdr *ipch; + struct ipcomp_data *ipcd = x->data; + + if (skb->len < ipcd->threshold) { + /* Don't bother compressing */ + goto out_ok; + } + + if (skb_linearize_cow(skb)) + goto out_ok; + + err = ipcomp_compress(x, skb); + + if (err) { + goto out_ok; + } + + /* Install ipcomp header, convert into ipcomp datagram. */ + ipch = ip_comp_hdr(skb); + ipch->nexthdr = *skb_mac_header(skb); + ipch->flags = 0; + ipch->cpi = htons((u16 )ntohl(x->id.spi)); + *skb_mac_header(skb) = IPPROTO_COMP; +out_ok: + skb_push(skb, -skb_network_offset(skb)); + return 0; +} +EXPORT_SYMBOL_GPL(ipcomp_output); + +static void ipcomp_free_scratches(void) +{ + int i; + void * __percpu *scratches; + + if (--ipcomp_scratch_users) + return; + + scratches = ipcomp_scratches; + if (!scratches) + return; + + for_each_possible_cpu(i) + vfree(*per_cpu_ptr(scratches, i)); + + free_percpu(scratches); +} + +static void * __percpu *ipcomp_alloc_scratches(void) +{ + void * __percpu *scratches; + int i; + + if (ipcomp_scratch_users++) + return ipcomp_scratches; + + scratches = alloc_percpu(void *); + if (!scratches) + return NULL; + + ipcomp_scratches = scratches; + + for_each_possible_cpu(i) { + void *scratch; + + scratch = vmalloc_node(IPCOMP_SCRATCH_SIZE, cpu_to_node(i)); + if (!scratch) + return NULL; + *per_cpu_ptr(scratches, i) = scratch; + } + + return scratches; +} + +static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms) +{ + struct ipcomp_tfms *pos; + int cpu; + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + if (pos->tfms == tfms) + break; + } + + WARN_ON(!pos); + + if (--pos->users) + return; + + list_del(&pos->list); + kfree(pos); + + if (!tfms) + return; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); + } + free_percpu(tfms); +} + +static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name) +{ + struct ipcomp_tfms *pos; + struct crypto_comp * __percpu *tfms; + int cpu; + + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + struct crypto_comp *tfm; + + /* This can be any valid CPU ID so we don't need locking. */ + tfm = __this_cpu_read(*pos->tfms); + + if (!strcmp(crypto_comp_name(tfm), alg_name)) { + pos->users++; + return pos->tfms; + } + } + + pos = kmalloc(sizeof(*pos), GFP_KERNEL); + if (!pos) + return NULL; + + pos->users = 1; + INIT_LIST_HEAD(&pos->list); + list_add(&pos->list, &ipcomp_tfms_list); + + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); + if (!tfms) + goto error; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + *per_cpu_ptr(tfms, cpu) = tfm; + } + + return tfms; + +error: + ipcomp_free_tfms(tfms); + return NULL; +} + +static void ipcomp_free_data(struct ipcomp_data *ipcd) +{ + if (ipcd->tfms) + ipcomp_free_tfms(ipcd->tfms); + ipcomp_free_scratches(); +} + +void ipcomp_destroy(struct xfrm_state *x) +{ + struct ipcomp_data *ipcd = x->data; + if (!ipcd) + return; + xfrm_state_delete_tunnel(x); + mutex_lock(&ipcomp_resource_mutex); + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); +} +EXPORT_SYMBOL_GPL(ipcomp_destroy); + +int ipcomp_init_state(struct xfrm_state *x) +{ + int err; + struct ipcomp_data *ipcd; + struct xfrm_algo_desc *calg_desc; + + err = -EINVAL; + if (!x->calg) + goto out; + + if (x->encap) + goto out; + + err = -ENOMEM; + ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); + if (!ipcd) + goto out; + + mutex_lock(&ipcomp_resource_mutex); + if (!ipcomp_alloc_scratches()) + goto error; + + ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name); + if (!ipcd->tfms) + goto error; + mutex_unlock(&ipcomp_resource_mutex); + + calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); + BUG_ON(!calg_desc); + ipcd->threshold = calg_desc->uinfo.comp.threshold; + x->data = ipcd; + err = 0; +out: + return err; + +error: + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); + goto out; +} +EXPORT_SYMBOL_GPL(ipcomp_init_state); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); +MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 2519129c6d2..c51e8f7b865 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -14,64 +14,70 @@ #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/skbuff.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> #include <net/xfrm.h> static int xfrm_output2(struct sk_buff *skb); -static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm_skb_check_space(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) - skb_headroom(skb); + int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); - if (nhead > 0) - return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + if (nhead <= 0) { + if (ntail <= 0) + return 0; + nhead = 0; + } else if (ntail < 0) + ntail = 0; - /* Check tail too... */ - return 0; + return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); } static int xfrm_output_one(struct sk_buff *skb, int err) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; + struct net *net = xs_net(x); if (err <= 0) goto resume; do { - err = xfrm_state_check_space(x, skb); + err = xfrm_skb_check_space(skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); goto error_nolock; } err = x->outer_mode->output(x, skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEMODEERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); goto error_nolock; } spin_lock_bh(&x->lock); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); + err = -EINVAL; + goto error; + } + err = xfrm_state_check_expire(x); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEEXPIRED); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); goto error; } - if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { - XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; - if (unlikely(x->replay.oseq == 0)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATESEQERROR); - x->replay.oseq--; - xfrm_audit_state_replay_overflow(x, skb); - err = -EOVERFLOW; - goto error; - } - if (xfrm_aevent_is_on()) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + err = x->repl->overflow(x, skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); + goto error; } x->curlft.bytes += skb->len; @@ -79,54 +85,53 @@ static int xfrm_output_one(struct sk_buff *skb, int err) spin_unlock_bh(&x->lock); + skb_dst_force(skb); + err = x->type->output(x, skb); if (err == -EINPROGRESS) - goto out_exit; + goto out; resume: if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEPROTOERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); goto error_nolock; } - if (!(skb->dst = dst_pop(dst))) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + dst = skb_dst_pop(skb); + if (!dst) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } - dst = skb->dst; + skb_dst_set(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); - err = 0; + return 0; -out_exit: - return err; error: spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); - goto out_exit; +out: + return err; } int xfrm_output_resume(struct sk_buff *skb, int err) { while (likely((err = xfrm_output_one(skb, err)) == 0)) { - struct xfrm_state *x; - nf_reset(skb); - err = skb->dst->ops->local_out(skb); + err = skb_dst(skb)->ops->local_out(skb); if (unlikely(err != 1)) goto out; - x = skb->dst->xfrm; - if (!x) + if (!skb_dst(skb)->xfrm) return dst_output(skb); - err = nf_hook(skb->dst->ops->family, + err = nf_hook(skb_dst(skb)->ops->family, NF_INET_POST_ROUTING, skb, - NULL, skb->dst->dev, xfrm_output2); + NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; } @@ -150,7 +155,7 @@ static int xfrm_output_gso(struct sk_buff *skb) segs = skb_gso_segment(skb, 0); kfree_skb(skb); - if (unlikely(IS_ERR(segs))) + if (IS_ERR(segs)) return PTR_ERR(segs); do { @@ -177,6 +182,7 @@ static int xfrm_output_gso(struct sk_buff *skb) int xfrm_output(struct sk_buff *skb) { + struct net *net = dev_net(skb_dst(skb)->dev); int err; if (skb_is_gso(skb)) @@ -185,7 +191,7 @@ int xfrm_output(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); kfree_skb(skb); return err; } @@ -193,13 +199,14 @@ int xfrm_output(struct sk_buff *skb) return xfrm_output2(skb); } +EXPORT_SYMBOL_GPL(xfrm_output); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { struct xfrm_mode *inner_mode; if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, - xfrm_af2proto(skb->dst->ops->family)); + xfrm_af2proto(skb_dst(skb)->ops->family)); else inner_mode = x->inner_mode; @@ -207,6 +214,25 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; return inner_mode->afinfo->extract_output(x, skb); } - -EXPORT_SYMBOL_GPL(xfrm_output); EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); + +void xfrm_local_error(struct sk_buff *skb, int mtu) +{ + unsigned int proto; + struct xfrm_state_afinfo *afinfo; + + if (skb->protocol == htons(ETH_P_IP)) + proto = AF_INET; + else if (skb->protocol == htons(ETH_P_IPV6)) + proto = AF_INET6; + else + return; + + afinfo = xfrm_state_get_afinfo(proto); + if (!afinfo) + return; + + afinfo->local_error(skb, mtu); + xfrm_state_put_afinfo(afinfo); +} +EXPORT_SYMBOL_GPL(xfrm_local_error); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9fc4c315f6c..0525d78ba32 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -26,6 +26,7 @@ #include <linux/cache.h> #include <linux/audit.h> #include <net/dst.h> +#include <net/flow.h> #include <net/xfrm.h> #include <net/ip.h> #ifdef CONFIG_XFRM_STATISTICS @@ -34,58 +35,52 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly; +#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) +#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) +#define XFRM_MAX_QUEUE_LEN 100 -#ifdef CONFIG_XFRM_STATISTICS -DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; -EXPORT_SYMBOL(xfrm_statistics); -#endif - -DEFINE_MUTEX(xfrm_cfg_mutex); -EXPORT_SYMBOL(xfrm_cfg_mutex); - -static DEFINE_RWLOCK(xfrm_policy_lock); - -unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_count); - -static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); -static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; +static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); +static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] + __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; -static struct work_struct xfrm_policy_gc_work; -static HLIST_HEAD(xfrm_policy_gc_list); -static DEFINE_SPINLOCK(xfrm_policy_gc_lock); - -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); +static int stale_bundle(struct dst_entry *dst); +static int xfrm_bundle_ok(struct xfrm_dst *xdst); +static void xfrm_policy_queue_process(unsigned long arg); -static inline int -__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) +static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, + int dir); + +static inline bool +__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { - return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + const struct flowi4 *fl4 = &fl->u.ip4; + + return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && + addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && + (fl4->flowi4_proto == sel->proto || !sel->proto) && + (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); } -static inline int -__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) +static inline bool +__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { - return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + const struct flowi6 *fl6 = &fl->u.ip6; + + return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && + (fl6->flowi6_proto == sel->proto || !sel->proto) && + (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); } -int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, - unsigned short family) +bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, + unsigned short family) { switch (family) { case AF_INET: @@ -93,28 +88,74 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, case AF_INET6: return __xfrm6_selector_match(sel, fl); } - return 0; + return false; +} + +static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo; + + if (unlikely(family >= NPROTO)) + return NULL; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[family]); + if (unlikely(!afinfo)) + rcu_read_unlock(); + return afinfo; +} + +static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + rcu_read_unlock(); +} + +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, + int family) +{ + struct xfrm_policy_afinfo *afinfo; + struct dst_entry *dst; + + afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return ERR_PTR(-EAFNOSUPPORT); + + dst = afinfo->dst_lookup(net, tos, saddr, daddr); + + xfrm_policy_put_afinfo(afinfo); + + return dst; } static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, + xfrm_address_t *prev_saddr, + xfrm_address_t *prev_daddr, int family) { + struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; - struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; - if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) + if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { saddr = x->coaddr; - if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) + daddr = prev_daddr; + } + if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { + saddr = prev_saddr; daddr = x->coaddr; + } - afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return ERR_PTR(-EAFNOSUPPORT); + dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); + + if (!IS_ERR(dst)) { + if (prev_saddr != saddr) + memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); + if (prev_daddr != daddr) + memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); + } - dst = afinfo->dst_lookup(tos, saddr, daddr); - xfrm_policy_put_afinfo(afinfo); return dst; } @@ -128,7 +169,7 @@ static inline unsigned long make_jiffies(long secs) static void xfrm_policy_timer(unsigned long data) { - struct xfrm_policy *xp = (struct xfrm_policy*)data; + struct xfrm_policy *xp = (struct xfrm_policy *)data; unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -136,7 +177,7 @@ static void xfrm_policy_timer(unsigned long data) read_lock(&xp->lock); - if (xp->dead) + if (unlikely(xp->walk.dead)) goto out; dir = xfrm_policy_id2dir(xp->index); @@ -196,24 +237,59 @@ expired: xfrm_pol_put(xp); } +static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) +{ + struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); + + if (unlikely(pol->walk.dead)) + flo = NULL; + else + xfrm_pol_hold(pol); + + return flo; +} + +static int xfrm_policy_flo_check(struct flow_cache_object *flo) +{ + struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); + + return !pol->walk.dead; +} + +static void xfrm_policy_flo_delete(struct flow_cache_object *flo) +{ + xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); +} + +static const struct flow_cache_ops xfrm_policy_fc_ops = { + .get = xfrm_policy_flo_get, + .check = xfrm_policy_flo_check, + .delete = xfrm_policy_flo_delete, +}; /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ -struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) +struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) { struct xfrm_policy *policy; policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { + write_pnet(&policy->xp_net, net); + INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); atomic_set(&policy->refcnt, 1); + skb_queue_head_init(&policy->polq.hold_queue); setup_timer(&policy->timer, xfrm_policy_timer, (unsigned long)policy); + setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, + (unsigned long)policy); + policy->flo.ops = &xfrm_policy_fc_ops; } return policy; } @@ -223,49 +299,22 @@ EXPORT_SYMBOL(xfrm_policy_alloc); void xfrm_policy_destroy(struct xfrm_policy *policy) { - BUG_ON(!policy->dead); + BUG_ON(!policy->walk.dead); - BUG_ON(policy->bundles); - - if (del_timer(&policy->timer)) + if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) BUG(); - security_xfrm_policy_free(policy); + security_xfrm_policy_free(policy->security); kfree(policy); } EXPORT_SYMBOL(xfrm_policy_destroy); -static void xfrm_policy_gc_kill(struct xfrm_policy *policy) -{ - struct dst_entry *dst; - - while ((dst = policy->bundles) != NULL) { - policy->bundles = dst->next; - dst_free(dst); - } - - if (del_timer(&policy->timer)) - atomic_dec(&policy->refcnt); - - if (atomic_read(&policy->refcnt) > 1) - flow_cache_flush(); - - xfrm_pol_put(policy); -} - -static void xfrm_policy_gc_task(struct work_struct *work) +static void xfrm_queue_purge(struct sk_buff_head *list) { - struct xfrm_policy *policy; - struct hlist_node *entry, *tmp; - struct hlist_head gc_list; - - spin_lock_bh(&xfrm_policy_gc_lock); - gc_list.first = xfrm_policy_gc_list.first; - INIT_HLIST_HEAD(&xfrm_policy_gc_list); - spin_unlock_bh(&xfrm_policy_gc_lock); + struct sk_buff *skb; - hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) - xfrm_policy_gc_kill(policy); + while ((skb = skb_dequeue(list)) != NULL) + kfree_skb(skb); } /* Rule must be locked. Release descentant resources, announce @@ -274,84 +323,75 @@ static void xfrm_policy_gc_task(struct work_struct *work) static void xfrm_policy_kill(struct xfrm_policy *policy) { - int dead; + policy->walk.dead = 1; - write_lock_bh(&policy->lock); - dead = policy->dead; - policy->dead = 1; - write_unlock_bh(&policy->lock); + atomic_inc(&policy->genid); - if (unlikely(dead)) { - WARN_ON(1); - return; - } + if (del_timer(&policy->polq.hold_timer)) + xfrm_pol_put(policy); + xfrm_queue_purge(&policy->polq.hold_queue); - spin_lock(&xfrm_policy_gc_lock); - hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); - spin_unlock(&xfrm_policy_gc_lock); + if (del_timer(&policy->timer)) + xfrm_pol_put(policy); - schedule_work(&xfrm_policy_gc_work); + xfrm_pol_put(policy); } -struct xfrm_policy_hash { - struct hlist_head *table; - unsigned int hmask; -}; - -static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; -static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; -static struct hlist_head *xfrm_policy_byidx __read_mostly; -static unsigned int xfrm_idx_hmask __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; -static inline unsigned int idx_hash(u32 index) +static inline unsigned int idx_hash(struct net *net, u32 index) { - return __idx_hash(index, xfrm_idx_hmask); + return __idx_hash(index, net->xfrm.policy_idx_hmask); } -static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) +static struct hlist_head *policy_hash_bysel(struct net *net, + const struct xfrm_selector *sel, + unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __sel_hash(sel, family, hmask); return (hash == hmask + 1 ? - &xfrm_policy_inexact[dir] : - xfrm_policy_bydst[dir].table + hash); + &net->xfrm.policy_inexact[dir] : + net->xfrm.policy_bydst[dir].table + hash); } -static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +static struct hlist_head *policy_hash_direct(struct net *net, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __addr_hash(daddr, saddr, family, hmask); - return xfrm_policy_bydst[dir].table + hash; + return net->xfrm.policy_bydst[dir].table + hash; } static void xfrm_dst_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp, *entry0 = NULL; + struct hlist_node *tmp, *entry0 = NULL; struct xfrm_policy *pol; unsigned int h0 = 0; redo: - hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { + hlist_for_each_entry_safe(pol, tmp, list, bydst) { unsigned int h; h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, pol->family, nhashmask); if (!entry0) { - hlist_del(entry); + hlist_del(&pol->bydst); hlist_add_head(&pol->bydst, ndsttable+h); h0 = h; } else { if (h != h0) continue; - hlist_del(entry); + hlist_del(&pol->bydst); hlist_add_after(entry0, &pol->bydst); } - entry0 = entry; + entry0 = &pol->bydst; } if (!hlist_empty(list)) { entry0 = NULL; @@ -363,10 +403,10 @@ static void xfrm_idx_hash_transfer(struct hlist_head *list, struct hlist_head *nidxtable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct xfrm_policy *pol; - hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { + hlist_for_each_entry_safe(pol, tmp, list, byidx) { unsigned int h; h = __idx_hash(pol->index, nhashmask); @@ -379,60 +419,60 @@ static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) return ((old_hmask + 1) << 1) - 1; } -static void xfrm_bydst_resize(int dir) +static void xfrm_bydst_resize(struct net *net, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = xfrm_policy_bydst[dir].table; + struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; struct hlist_head *ndst = xfrm_hash_alloc(nsize); int i; if (!ndst) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); - xfrm_policy_bydst[dir].table = ndst; - xfrm_policy_bydst[dir].hmask = nhashmask; + net->xfrm.policy_bydst[dir].table = ndst; + net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } -static void xfrm_byidx_resize(int total) +static void xfrm_byidx_resize(struct net *net, int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *oidx = xfrm_policy_byidx; + struct hlist_head *oidx = net->xfrm.policy_byidx; struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; if (!nidx) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); - xfrm_policy_byidx = nidx; - xfrm_idx_hmask = nhashmask; + net->xfrm.policy_byidx = nidx; + net->xfrm.policy_idx_hmask = nhashmask; - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } -static inline int xfrm_bydst_should_resize(int dir, int *total) +static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) { - unsigned int cnt = xfrm_policy_count[dir]; - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int cnt = net->xfrm.policy_count[dir]; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; if (total) *total += cnt; @@ -444,9 +484,9 @@ static inline int xfrm_bydst_should_resize(int dir, int *total) return 0; } -static inline int xfrm_byidx_should_resize(int total) +static inline int xfrm_byidx_should_resize(struct net *net, int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; if ((hmask + 1) < xfrm_policy_hashmax && total > hmask) @@ -455,61 +495,65 @@ static inline int xfrm_byidx_should_resize(int total) return 0; } -void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) +void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { - read_lock_bh(&xfrm_policy_lock); - si->incnt = xfrm_policy_count[XFRM_POLICY_IN]; - si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT]; - si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD]; - si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; - si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; - si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; - si->spdhcnt = xfrm_idx_hmask; + read_lock_bh(&net->xfrm.xfrm_policy_lock); + si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; + si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; + si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(struct work_struct *__unused) +static void xfrm_hash_resize(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.policy_hash_work); int dir, total; mutex_lock(&hash_resize_mutex); total = 0; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - if (xfrm_bydst_should_resize(dir, &total)) - xfrm_bydst_resize(dir); + if (xfrm_bydst_should_resize(net, dir, &total)) + xfrm_bydst_resize(net, dir); } - if (xfrm_byidx_should_resize(total)) - xfrm_byidx_resize(total); + if (xfrm_byidx_should_resize(net, total)) + xfrm_byidx_resize(net, total); mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); - /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(u8 type, int dir) +static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { static u32 idx_generator; for (;;) { - struct hlist_node *entry; struct hlist_head *list; struct xfrm_policy *p; u32 idx; int found; - idx = (idx_generator | dir); - idx_generator += 8; + if (!index) { + idx = (idx_generator | dir); + idx_generator += 8; + } else { + idx = index; + index = 0; + } + if (idx == 0) idx = 8; - list = xfrm_policy_byidx + idx_hash(idx); + list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; - hlist_for_each_entry(p, entry, list, byidx) { + hlist_for_each_entry(p, list, byidx) { if (p->index == idx) { found = 1; break; @@ -535,25 +579,68 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s return 0; } +static void xfrm_policy_requeue(struct xfrm_policy *old, + struct xfrm_policy *new) +{ + struct xfrm_policy_queue *pq = &old->polq; + struct sk_buff_head list; + + __skb_queue_head_init(&list); + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice_init(&pq->hold_queue, &list); + if (del_timer(&pq->hold_timer)) + xfrm_pol_put(old); + spin_unlock_bh(&pq->hold_queue.lock); + + if (skb_queue_empty(&list)) + return; + + pq = &new->polq; + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice(&list, &pq->hold_queue); + pq->timeout = XFRM_QUEUE_TMO_MIN; + if (!mod_timer(&pq->hold_timer, jiffies)) + xfrm_pol_hold(new); + spin_unlock_bh(&pq->hold_queue.lock); +} + +static bool xfrm_policy_mark_match(struct xfrm_policy *policy, + struct xfrm_policy *pol) +{ + u32 mark = policy->mark.v & policy->mark.m; + + if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) + return true; + + if ((mark & pol->mark.m) == pol->mark.v && + policy->priority == pol->priority) + return true; + + return false; +} + int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { + struct net *net = xp_net(policy); struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; - struct hlist_node *entry, *newpos; - struct dst_entry *gc_list; + struct hlist_node *newpos; - write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(&policy->selector, policy->family, dir); + write_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (pol->type == policy->type && !selector_cmp(&pol->selector, &policy->selector) && + xfrm_policy_mark_match(policy, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return -EEXIST; } delpol = pol; @@ -571,176 +658,146 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - xfrm_policy_count[dir]++; - atomic_inc(&flow_cache_genid); + net->xfrm.policy_count[dir]++; + atomic_inc(&net->xfrm.flow_cache_genid); + + /* After previous checking, family can either be AF_INET or AF_INET6 */ + if (policy->family == AF_INET) + rt_genid_bump_ipv4(net); + else + rt_genid_bump_ipv6(net); + if (delpol) { - hlist_del(&delpol->bydst); - hlist_del(&delpol->byidx); - xfrm_policy_count[dir]--; + xfrm_policy_requeue(delpol, policy); + __xfrm_policy_unlink(delpol, dir); } - policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); - hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); + hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - write_unlock_bh(&xfrm_policy_lock); + list_add(&policy->walk.all, &net->xfrm.policy_all); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); - else if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); - - read_lock_bh(&xfrm_policy_lock); - gc_list = NULL; - entry = &policy->bydst; - hlist_for_each_entry_continue(policy, entry, bydst) { - struct dst_entry *dst; - - write_lock(&policy->lock); - dst = policy->bundles; - if (dst) { - struct dst_entry *tail = dst; - while (tail->next) - tail = tail->next; - tail->next = gc_list; - gc_list = dst; - - policy->bundles = NULL; - } - write_unlock(&policy->lock); - } - read_unlock_bh(&xfrm_policy_lock); - - while (gc_list) { - struct dst_entry *dst = gc_list; - - gc_list = dst->next; - dst_free(dst); - } + else if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); return 0; } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, - struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, + int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; - struct hlist_node *entry; *err = 0; - write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(sel, sel->family, dir); + write_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && + (mark & pol->mark.m) == pol->mark.v && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); if (delete) { - *err = security_xfrm_policy_delete(pol); + *err = security_xfrm_policy_delete( + pol->security); if (*err) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - xfrm_policy_count[dir]--; + __xfrm_policy_unlink(pol, dir); } ret = pol; break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); - if (ret && delete) { - atomic_inc(&flow_cache_genid); + if (ret && delete) xfrm_policy_kill(ret); - } return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, - int *err) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, + int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; - struct hlist_node *entry; *err = -ENOENT; if (xfrm_policy_id2dir(id) != dir) return NULL; *err = 0; - write_lock_bh(&xfrm_policy_lock); - chain = xfrm_policy_byidx + idx_hash(id); + write_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; - hlist_for_each_entry(pol, entry, chain, byidx) { - if (pol->type == type && pol->index == id) { + hlist_for_each_entry(pol, chain, byidx) { + if (pol->type == type && pol->index == id && + (mark & pol->mark.m) == pol->mark.v) { xfrm_pol_hold(pol); if (delete) { - *err = security_xfrm_policy_delete(pol); + *err = security_xfrm_policy_delete( + pol->security); if (*err) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - xfrm_policy_count[dir]--; + __xfrm_policy_unlink(pol, dir); } ret = pol; break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); - if (ret && delete) { - atomic_inc(&flow_cache_genid); + if (ret && delete) xfrm_policy_kill(ret); - } return ret; } EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { int dir, err = 0; for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; - struct hlist_node *entry; int i; - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + hlist_for_each_entry(pol, + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; - err = security_xfrm_policy_delete(pol); + err = security_xfrm_policy_delete(pol->security); if (err) { - xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 0, task_valid); return err; } } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; - err = security_xfrm_policy_delete(pol); + err = security_xfrm_policy_delete( + pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->secid); + task_valid); return err; } } @@ -750,162 +807,170 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) } #else static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { return 0; } #endif -int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { - int dir, err = 0; + int dir, err = 0, cnt = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) goto out; for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; - struct hlist_node *entry; - int i, killed; + int i; - killed = 0; again1: - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + hlist_for_each_entry(pol, + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - write_unlock_bh(&xfrm_policy_lock); + __xfrm_policy_unlink(pol, dir); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + cnt++; - xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - killed++; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: - hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + hlist_for_each_entry(pol, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - write_unlock_bh(&xfrm_policy_lock); + __xfrm_policy_unlink(pol, dir); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + cnt++; - xfrm_audit_policy_delete(pol, 1, - audit_info->loginuid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - killed++; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); goto again2; } } - xfrm_policy_count[dir] -= killed; } - atomic_inc(&flow_cache_genid); + if (!cnt) + err = -ESRCH; out: - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, + int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *pol, *last = NULL; - struct hlist_node *entry; - int dir, last_dir = 0, count, error; + struct xfrm_policy *pol; + struct xfrm_policy_walk_entry *x; + int error = 0; - read_lock_bh(&xfrm_policy_lock); - count = 0; + if (walk->type >= XFRM_POLICY_TYPE_MAX && + walk->type != XFRM_POLICY_TYPE_ANY) + return -EINVAL; - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - struct hlist_head *table = xfrm_policy_bydst[dir].table; - int i; + if (list_empty(&walk->walk.all) && walk->seq != 0) + return 0; - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { - if (pol->type != type) - continue; - if (last) { - error = func(last, last_dir % XFRM_POLICY_MAX, - count, data); - if (error) - goto out; - } - last = pol; - last_dir = dir; - count++; - } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) { - if (pol->type != type) - continue; - if (last) { - error = func(last, last_dir % XFRM_POLICY_MAX, - count, data); - if (error) - goto out; - } - last = pol; - last_dir = dir; - count++; - } + write_lock_bh(&net->xfrm.xfrm_policy_lock); + if (list_empty(&walk->walk.all)) + x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); + else + x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); + list_for_each_entry_from(x, &net->xfrm.policy_all, all) { + if (x->dead) + continue; + pol = container_of(x, struct xfrm_policy, walk); + if (walk->type != XFRM_POLICY_TYPE_ANY && + walk->type != pol->type) + continue; + error = func(pol, xfrm_policy_id2dir(pol->index), + walk->seq, data); + if (error) { + list_move_tail(&walk->walk.all, &x->all); + goto out; } + walk->seq++; } - if (count == 0) { + if (walk->seq == 0) { error = -ENOENT; goto out; } - error = func(last, last_dir % XFRM_POLICY_MAX, 0, data); + list_del_init(&walk->walk.all); out: - read_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); +void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + INIT_LIST_HEAD(&walk->walk.all); + walk->walk.dead = 1; + walk->type = type; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_policy_walk_init); + +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) +{ + if (list_empty(&walk->walk.all)) + return; + + write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ + list_del(&walk->walk.all); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_policy_walk_done); + /* * Find policy to apply to this flow. * * Returns 0 if policy found, else an -errno. */ -static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, +static int xfrm_policy_match(const struct xfrm_policy *pol, + const struct flowi *fl, u8 type, u16 family, int dir) { - struct xfrm_selector *sel = &pol->selector; - int match, ret = -ESRCH; + const struct xfrm_selector *sel = &pol->selector; + int ret = -ESRCH; + bool match; if (pol->family != family || + (fl->flowi_mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; match = xfrm_selector_match(sel, fl, family); if (match) - ret = security_xfrm_policy_lookup(pol, fl->secid, dir); + ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, + dir); return ret; } -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, +static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, + const struct flowi *fl, u16 family, u8 dir) { int err; struct xfrm_policy *pol, *ret; - xfrm_address_t *daddr, *saddr; - struct hlist_node *entry; + const xfrm_address_t *daddr, *saddr; struct hlist_head *chain; u32 priority = ~0U; @@ -914,10 +979,10 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, if (unlikely(!daddr || !saddr)) return NULL; - read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(daddr, saddr, family, dir); + read_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -932,8 +997,8 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, break; } } - chain = &xfrm_policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { + chain = &net->xfrm.policy_inexact[dir]; + hlist_for_each_entry(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -950,37 +1015,60 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, if (ret) xfrm_pol_hold(ret); fail: - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } -static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static struct xfrm_policy * +__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) { +#ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; - int err = 0; -#ifdef CONFIG_XFRM_SUB_POLICY - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); - if (IS_ERR(pol)) { - err = PTR_ERR(pol); - pol = NULL; - } - if (pol || err) - goto end; + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); + if (pol != NULL) + return pol; #endif - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); - if (IS_ERR(pol)) { - err = PTR_ERR(pol); - pol = NULL; + return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); +} + +static int flow_to_policy_dir(int dir) +{ + if (XFRM_POLICY_IN == FLOW_DIR_IN && + XFRM_POLICY_OUT == FLOW_DIR_OUT && + XFRM_POLICY_FWD == FLOW_DIR_FWD) + return dir; + + switch (dir) { + default: + case FLOW_DIR_IN: + return XFRM_POLICY_IN; + case FLOW_DIR_OUT: + return XFRM_POLICY_OUT; + case FLOW_DIR_FWD: + return XFRM_POLICY_FWD; } -#ifdef CONFIG_XFRM_SUB_POLICY -end: -#endif - if ((*objp = (void *) pol) != NULL) - *obj_refp = &pol->refcnt; - return err; +} + +static struct flow_cache_object * +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, + u8 dir, struct flow_cache_object *old_obj, void *ctx) +{ + struct xfrm_policy *pol; + + if (old_obj) + xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); + + pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); + if (IS_ERR_OR_NULL(pol)) + return ERR_CAST(pol); + + /* Resolver returns two references: + * one for cache and one for caller of flow_cache_lookup() */ + xfrm_pol_hold(pol); + + return &pol->flo; } static inline int policy_to_flow_dir(int dir) @@ -1000,19 +1088,26 @@ static inline int policy_to_flow_dir(int dir) } } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, + const struct flowi *fl) { struct xfrm_policy *pol; + struct net *net = sock_net(sk); - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); if ((pol = sk->sk_policy[dir]) != NULL) { - int match = xfrm_selector_match(&pol->selector, fl, - sk->sk_family); + bool match = xfrm_selector_match(&pol->selector, fl, + sk->sk_family); int err = 0; if (match) { - err = security_xfrm_policy_lookup(pol, fl->secid, - policy_to_flow_dir(dir)); + if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { + pol = NULL; + goto out; + } + err = security_xfrm_policy_lookup(pol->security, + fl->flowi_secid, + policy_to_flow_dir(dir)); if (!err) xfrm_pol_hold(pol); else if (err == -ESRCH) @@ -1022,45 +1117,51 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc } else pol = NULL; } - read_unlock_bh(&xfrm_policy_lock); +out: + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - struct hlist_head *chain = policy_hash_bysel(&pol->selector, + struct net *net = xp_net(pol); + struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, pol->family, dir); + list_add(&pol->walk.all, &net->xfrm.policy_all); hlist_add_head(&pol->bydst, chain); - hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); - xfrm_policy_count[dir]++; + hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); + net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); - if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); + if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { + struct net *net = xp_net(pol); + if (hlist_unhashed(&pol->bydst)) return NULL; - hlist_del(&pol->bydst); + hlist_del_init(&pol->bydst); hlist_del(&pol->byidx); - xfrm_policy_count[dir]--; + list_del(&pol->walk.all); + net->xfrm.policy_count[dir]--; return pol; } int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { - write_lock_bh(&xfrm_policy_lock); + struct net *net = xp_net(pol); + + write_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { - if (dir < XFRM_POLICY_MAX) - atomic_inc(&flow_cache_genid); xfrm_policy_kill(pol); return 0; } @@ -1070,6 +1171,7 @@ EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { + struct net *net = xp_net(pol); struct xfrm_policy *old_pol; #ifdef CONFIG_XFRM_SUB_POLICY @@ -1077,17 +1179,24 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return -EINVAL; #endif - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = get_seconds(); - pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } - if (old_pol) + if (old_pol) { + if (pol) + xfrm_policy_requeue(old_pol, pol); + + /* Unlinking succeeds always. This is the only function + * allowed to delete or replace socket policy. + */ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); - write_unlock_bh(&xfrm_policy_lock); + } + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); @@ -1095,18 +1204,21 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return 0; } -static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) +static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) { - struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); + struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); + struct net *net = xp_net(old); if (newp) { newp->selector = old->selector; - if (security_xfrm_policy_clone(old, newp)) { + if (security_xfrm_policy_clone(old->security, + &newp->security)) { kfree(newp); return NULL; /* ENOMEM */ } newp->lft = old->lft; newp->curlft = old->curlft; + newp->mark = old->mark; newp->action = old->action; newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; @@ -1114,9 +1226,9 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; @@ -1136,7 +1248,7 @@ int __xfrm_sk_clone_policy(struct sock *sk) } static int -xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, +xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; @@ -1144,7 +1256,7 @@ xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(local, remote); + err = afinfo->get_saddr(net, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1152,17 +1264,17 @@ xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, + struct xfrm_state **xfrm, unsigned short family) { + struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp; - for (nx=0, i = 0; i < policy->xfrm_nr; i++) { + for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; xfrm_address_t *remote = daddr; xfrm_address_t *local = saddr; @@ -1172,9 +1284,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; - family = tmpl->encap_family; - if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(&tmp, remote, family); + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); if (error) goto fail; local = &tmp; @@ -1193,6 +1304,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, error = (x->km.state == XFRM_STATE_ERROR ? -EINVAL : -EAGAIN); xfrm_state_put(x); + } else if (error == -ESRCH) { + error = -EAGAIN; } if (!tmpl->optional) @@ -1201,15 +1314,14 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, return nx; fail: - for (nx--; nx>=0; nx--) + for (nx--; nx >= 0; nx--) xfrm_state_put(xfrm[nx]); return error; } static int -xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, + struct xfrm_state **xfrm, unsigned short family) { struct xfrm_state *tp[XFRM_MAX_DEPTH]; struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; @@ -1239,7 +1351,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, return cnx; fail: - for (cnx--; cnx>=0; cnx--) + for (cnx--; cnx >= 0; cnx--) xfrm_state_put(tpp[cnx]); return error; @@ -1249,19 +1361,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, * still valid. */ -static struct dst_entry * -xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) -{ - struct dst_entry *x; - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return ERR_PTR(-EINVAL); - x = afinfo->find_bundle(fl, policy); - xfrm_policy_put_afinfo(afinfo); - return x; -} - -static inline int xfrm_get_tos(struct flowi *fl, int family) +static inline int xfrm_get_tos(const struct flowi *fl, int family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); int tos; @@ -1276,15 +1376,88 @@ static inline int xfrm_get_tos(struct flowi *fl, int family) return tos; } -static inline struct xfrm_dst *xfrm_alloc_dst(int family) +static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) +{ + struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); + struct dst_entry *dst = &xdst->u.dst; + + if (xdst->route == NULL) { + /* Dummy bundle - if it has xfrms we were not + * able to build bundle as template resolution failed. + * It means we need to try again resolving. */ + if (xdst->num_xfrms > 0) + return NULL; + } else if (dst->flags & DST_XFRM_QUEUE) { + return NULL; + } else { + /* Real bundle */ + if (stale_bundle(dst)) + return NULL; + } + + dst_hold(dst); + return flo; +} + +static int xfrm_bundle_flo_check(struct flow_cache_object *flo) +{ + struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); + struct dst_entry *dst = &xdst->u.dst; + + if (!xdst->route) + return 0; + if (stale_bundle(dst)) + return 0; + + return 1; +} + +static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) +{ + struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); + struct dst_entry *dst = &xdst->u.dst; + + dst_free(dst); +} + +static const struct flow_cache_ops xfrm_bundle_fc_ops = { + .get = xfrm_bundle_flo_get, + .check = xfrm_bundle_flo_check, + .delete = xfrm_bundle_flo_delete, +}; + +static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct dst_ops *dst_ops; struct xfrm_dst *xdst; if (!afinfo) return ERR_PTR(-EINVAL); - xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS); + switch (family) { + case AF_INET: + dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); + + if (likely(xdst)) { + struct dst_entry *dst = &xdst->u.dst; + + memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); + xdst->flo.ops = &xfrm_bundle_fc_ops; + if (afinfo->init_dst) + afinfo->init_dst(net, xdst); + } else + xdst = ERR_PTR(-ENOBUFS); xfrm_policy_put_afinfo(afinfo); @@ -1308,7 +1481,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, return err; } -static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + const struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1317,24 +1491,27 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) if (!afinfo) return -EINVAL; - err = afinfo->fill_dst(xdst, dev); + err = afinfo->fill_dst(xdst, dev, fl); xfrm_policy_put_afinfo(afinfo); return err; } + /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, - struct flowi *fl, + const struct flowi *fl, struct dst_entry *dst) { + struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; + struct xfrm_mode *inner_mode; struct dst_entry *dst_prev = NULL; struct dst_entry *dst0 = NULL; int i = 0; @@ -1344,6 +1521,9 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, int trailer_len = 0; int tos; int family = policy->selector.family; + xfrm_address_t saddr, daddr; + + xfrm_flowi_addr_get(fl, &saddr, &daddr, family); tos = xfrm_get_tos(fl, family); err = tos; @@ -1353,7 +1533,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_hold(dst); for (; i < nx; i++) { - struct xfrm_dst *xdst = xfrm_alloc_dst(family); + struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); struct dst_entry *dst1 = &xdst->u.dst; err = PTR_ERR(xdst); @@ -1362,6 +1542,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } + if (xfrm[i]->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(xfrm[i], + xfrm_af2proto(family)); + if (!inner_mode) { + err = -EAFNOSUPPORT; + dst_release(dst); + goto put_states; + } + } else + inner_mode = xfrm[i]->inner_mode; + if (!dst_prev) dst0 = dst1; else { @@ -1370,11 +1561,12 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, } xdst->route = dst; - memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics)); + dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; - dst = xfrm_dst_lookup(xfrm[i], tos, family); + dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, + family); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; @@ -1382,14 +1574,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_hold(dst); dst1->xfrm = xfrm[i]; - xdst->genid = xfrm[i]->genid; + xdst->xfrm_genid = xfrm[i]->genid; - dst1->obsolete = -1; + dst1->obsolete = DST_OBSOLETE_FORCE_CHK; dst1->flags |= DST_HOST; dst1->lastuse = now; dst1->input = dst_discard; - dst1->output = xfrm[i]->outer_mode->afinfo->output; + dst1->output = inner_mode->afinfo->output; dst1->next = dst_prev; dst_prev = dst1; @@ -1408,16 +1600,13 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (!dev) goto free_dst; - /* Copy neighbout for reachability confirmation */ - dst0->neighbour = neigh_clone(dst->neighbour); - xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); xfrm_init_pmtu(dst_prev); for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; - err = xfrm_fill_dst(xdst, dev); + err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; @@ -1440,20 +1629,22 @@ free_dst: goto out; } -static int inline -xfrm_dst_alloc_copy(void **target, void *src, int size) +#ifdef CONFIG_XFRM_SUB_POLICY +static int xfrm_dst_alloc_copy(void **target, const void *src, int size) { if (!*target) { *target = kmalloc(size, GFP_ATOMIC); if (!*target) return -ENOMEM; } + memcpy(*target, src, size); return 0; } +#endif -static int inline -xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) +static int xfrm_dst_update_parent(struct dst_entry *dst, + const struct xfrm_selector *sel) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1464,8 +1655,8 @@ xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) #endif } -static int inline -xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) +static int xfrm_dst_update_origin(struct dst_entry *dst, + const struct flowi *fl) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1475,276 +1666,537 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) #endif } -static int stale_bundle(struct dst_entry *dst); +static int xfrm_expand_policies(const struct flowi *fl, u16 family, + struct xfrm_policy **pols, + int *num_pols, int *num_xfrms) +{ + int i; -/* Main function: finds/creates a bundle for given flow. - * - * At the moment we eat a raw IP route. Mostly to speed up lookups - * on interfaces with disabled IPsec. - */ -int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) + if (*num_pols == 0 || !pols[0]) { + *num_pols = 0; + *num_xfrms = 0; + return 0; + } + if (IS_ERR(pols[0])) + return PTR_ERR(pols[0]); + + *num_xfrms = pols[0]->xfrm_nr; + +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && + pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), + XFRM_POLICY_TYPE_MAIN, + fl, family, + XFRM_POLICY_OUT); + if (pols[1]) { + if (IS_ERR(pols[1])) { + xfrm_pols_put(pols, *num_pols); + return PTR_ERR(pols[1]); + } + (*num_pols)++; + (*num_xfrms) += pols[1]->xfrm_nr; + } + } +#endif + for (i = 0; i < *num_pols; i++) { + if (pols[i]->action != XFRM_POLICY_ALLOW) { + *num_xfrms = -1; + break; + } + } + + return 0; + +} + +static struct xfrm_dst * +xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, + const struct flowi *fl, u16 family, + struct dst_entry *dst_orig) { - struct xfrm_policy *policy; - struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - int npols; - int pol_dead; - int xfrm_nr; - int pi; + struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; - struct dst_entry *dst, *dst_orig = *dst_p; - int nx = 0; + struct dst_entry *dst; + struct xfrm_dst *xdst; int err; - u32 genid; - u16 family; - u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); -restart: - genid = atomic_read(&flow_cache_genid); - policy = NULL; - for (pi = 0; pi < ARRAY_SIZE(pols); pi++) - pols[pi] = NULL; - npols = 0; - pol_dead = 0; - xfrm_nr = 0; + /* Try to instantiate a bundle */ + err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); + if (err <= 0) { + if (err != 0 && err != -EAGAIN) + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); + return ERR_PTR(err); + } - if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); - err = PTR_ERR(policy); - if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); - goto dropdst; - } + dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); + if (IS_ERR(dst)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); + return ERR_CAST(dst); } - if (!policy) { - /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || - !xfrm_policy_count[XFRM_POLICY_OUT]) - goto nopol; + xdst = (struct xfrm_dst *)dst; + xdst->num_xfrms = err; + if (num_pols > 1) + err = xfrm_dst_update_parent(dst, &pols[1]->selector); + else + err = xfrm_dst_update_origin(dst, fl); + if (unlikely(err)) { + dst_free(dst); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + return ERR_PTR(err); + } - policy = flow_cache_lookup(fl, dst_orig->ops->family, - dir, xfrm_policy_lookup); - err = PTR_ERR(policy); - if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); - goto dropdst; - } + xdst->num_pols = num_pols; + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); + xdst->policy_genid = atomic_read(&pols[0]->genid); + + return xdst; +} + +static void xfrm_policy_queue_process(unsigned long arg) +{ + int err = 0; + struct sk_buff *skb; + struct sock *sk; + struct dst_entry *dst; + struct xfrm_policy *pol = (struct xfrm_policy *)arg; + struct xfrm_policy_queue *pq = &pol->polq; + struct flowi fl; + struct sk_buff_head list; + + spin_lock(&pq->hold_queue.lock); + skb = skb_peek(&pq->hold_queue); + if (!skb) { + spin_unlock(&pq->hold_queue.lock); + goto out; } + dst = skb_dst(skb); + sk = skb->sk; + xfrm_decode_session(skb, &fl, dst->ops->family); + spin_unlock(&pq->hold_queue.lock); - if (!policy) - goto nopol; + dst_hold(dst->path); + dst = xfrm_lookup(xp_net(pol), dst->path, &fl, + sk, 0); + if (IS_ERR(dst)) + goto purge_queue; - family = dst_orig->ops->family; - pols[0] = policy; - npols ++; - xfrm_nr += pols[0]->xfrm_nr; + if (dst->flags & DST_XFRM_QUEUE) { + dst_release(dst); - err = -ENOENT; - if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP)) - goto error; + if (pq->timeout >= XFRM_QUEUE_TMO_MAX) + goto purge_queue; - policy->curlft.use_time = get_seconds(); + pq->timeout = pq->timeout << 1; + if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) + xfrm_pol_hold(pol); + goto out; + } - switch (policy->action) { - default: - case XFRM_POLICY_BLOCK: - /* Prohibit the flow */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); - err = -EPERM; - goto error; + dst_release(dst); - case XFRM_POLICY_ALLOW: -#ifndef CONFIG_XFRM_SUB_POLICY - if (policy->xfrm_nr == 0) { - /* Flow passes not transformed. */ - xfrm_pol_put(policy); - return 0; - } -#endif + __skb_queue_head_init(&list); - /* Try to find matching bundle. - * - * LATER: help from flow cache. It is optional, this - * is required only for output policy. - */ - dst = xfrm_find_bundle(fl, policy, family); + spin_lock(&pq->hold_queue.lock); + pq->timeout = 0; + skb_queue_splice_init(&pq->hold_queue, &list); + spin_unlock(&pq->hold_queue.lock); + + while (!skb_queue_empty(&list)) { + skb = __skb_dequeue(&list); + + xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + dst_hold(skb_dst(skb)->path); + dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, + &fl, skb->sk, 0); if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); - err = PTR_ERR(dst); - goto error; + kfree_skb(skb); + continue; } - if (dst) - break; + nf_reset(skb); + skb_dst_drop(skb); + skb_dst_set(skb, dst); -#ifdef CONFIG_XFRM_SUB_POLICY - if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, - fl, family, - XFRM_POLICY_OUT); - if (pols[1]) { - if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); - err = PTR_ERR(pols[1]); - goto error; - } - if (pols[1]->action == XFRM_POLICY_BLOCK) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); - err = -EPERM; - goto error; - } - npols ++; - xfrm_nr += pols[1]->xfrm_nr; - } - } + err = dst_output(skb); + } - /* - * Because neither flowi nor bundle information knows about - * transformation template size. On more than one policy usage - * we can realize whether all of them is bypass or not after - * they are searched. See above not-transformed bypass - * is surrounded by non-sub policy configuration, too. - */ - if (xfrm_nr == 0) { - /* Flow passes not transformed. */ - xfrm_pols_put(pols, npols); - return 0; - } +out: + xfrm_pol_put(pol); + return; -#endif - nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); - - if (unlikely(nx<0)) { - err = nx; - if (err == -EAGAIN && sysctl_xfrm_larval_drop) { - /* EREMOTE tells the caller to generate - * a one-shot blackhole route. - */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); - xfrm_pol_put(policy); - return -EREMOTE; - } - if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { - DECLARE_WAITQUEUE(wait, current); +purge_queue: + pq->timeout = 0; + xfrm_queue_purge(&pq->hold_queue); + xfrm_pol_put(pol); +} - add_wait_queue(&km_waitq, &wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - set_current_state(TASK_RUNNING); - remove_wait_queue(&km_waitq, &wait); +static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) +{ + unsigned long sched_next; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_dst *xdst = (struct xfrm_dst *) dst; + struct xfrm_policy *pol = xdst->pols[0]; + struct xfrm_policy_queue *pq = &pol->polq; + const struct sk_buff *fclone = skb + 1; - nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && + fclone->fclone == SKB_FCLONE_CLONE)) { + kfree_skb(skb); + return 0; + } - if (nx == -EAGAIN && signal_pending(current)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); - err = -ERESTART; - goto error; - } - if (nx == -EAGAIN || - genid != atomic_read(&flow_cache_genid)) { - xfrm_pols_put(pols, npols); - goto restart; - } - err = nx; - } - if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); - goto error; - } - } - if (nx == 0) { - /* Flow passes not transformed. */ - xfrm_pols_put(pols, npols); - return 0; - } + if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { + kfree_skb(skb); + return -EAGAIN; + } - dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); - err = PTR_ERR(dst); - if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR); - goto error; - } + skb_dst_force(skb); - for (pi = 0; pi < npols; pi++) { - read_lock_bh(&pols[pi]->lock); - pol_dead |= pols[pi]->dead; - read_unlock_bh(&pols[pi]->lock); - } + spin_lock_bh(&pq->hold_queue.lock); - write_lock_bh(&policy->lock); - if (unlikely(pol_dead || stale_bundle(dst))) { - /* Wow! While we worked on resolving, this - * policy has gone. Retry. It is not paranoia, - * we just cannot enlist new bundle to dead object. - * We can't enlist stable bundles either. - */ - write_unlock_bh(&policy->lock); - if (dst) - dst_free(dst); - - if (pol_dead) - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD); - else - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); - err = -EHOSTUNREACH; - goto error; - } + if (!pq->timeout) + pq->timeout = XFRM_QUEUE_TMO_MIN; - if (npols > 1) - err = xfrm_dst_update_parent(dst, &pols[1]->selector); - else - err = xfrm_dst_update_origin(dst, fl); - if (unlikely(err)) { - write_unlock_bh(&policy->lock); - if (dst) - dst_free(dst); - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); - goto error; - } + sched_next = jiffies + pq->timeout; - dst->next = policy->bundles; - policy->bundles = dst; - dst_hold(dst); - write_unlock_bh(&policy->lock); + if (del_timer(&pq->hold_timer)) { + if (time_before(pq->hold_timer.expires, sched_next)) + sched_next = pq->hold_timer.expires; + xfrm_pol_put(pol); } - *dst_p = dst; - dst_release(dst_orig); - xfrm_pols_put(pols, npols); + + __skb_queue_tail(&pq->hold_queue, skb); + if (!mod_timer(&pq->hold_timer, sched_next)) + xfrm_pol_hold(pol); + + spin_unlock_bh(&pq->hold_queue.lock); + return 0; +} + +static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, + struct dst_entry *dst, + const struct flowi *fl, + int num_xfrms, + u16 family) +{ + int err; + struct net_device *dev; + struct dst_entry *dst1; + struct xfrm_dst *xdst; + + xdst = xfrm_alloc_dst(net, family); + if (IS_ERR(xdst)) + return xdst; + + if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) + return xdst; + + dst1 = &xdst->u.dst; + dst_hold(dst); + xdst->route = dst; + + dst_copy_metrics(dst1, dst); + + dst1->obsolete = DST_OBSOLETE_FORCE_CHK; + dst1->flags |= DST_HOST | DST_XFRM_QUEUE; + dst1->lastuse = jiffies; + + dst1->input = dst_discard; + dst1->output = xdst_queue_output; + + dst_hold(dst); + dst1->child = dst; + dst1->path = dst; + + xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); + + err = -ENODEV; + dev = dst->dev; + if (!dev) + goto free_dst; + + err = xfrm_fill_dst(xdst, dev, fl); + if (err) + goto free_dst; + +out: + return xdst; + +free_dst: + dst_release(dst1); + xdst = ERR_PTR(err); + goto out; +} + +static struct flow_cache_object * +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, + struct flow_cache_object *oldflo, void *ctx) +{ + struct dst_entry *dst_orig = (struct dst_entry *)ctx; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + struct xfrm_dst *xdst, *new_xdst; + int num_pols = 0, num_xfrms = 0, i, err, pol_dead; + + /* Check if the policies from old bundle are usable */ + xdst = NULL; + if (oldflo) { + xdst = container_of(oldflo, struct xfrm_dst, flo); + num_pols = xdst->num_pols; + num_xfrms = xdst->num_xfrms; + pol_dead = 0; + for (i = 0; i < num_pols; i++) { + pols[i] = xdst->pols[i]; + pol_dead |= pols[i]->walk.dead; + } + if (pol_dead) { + dst_free(&xdst->u.dst); + xdst = NULL; + num_pols = 0; + num_xfrms = 0; + oldflo = NULL; + } + } + /* Resolve policies to use if we couldn't get them from + * previous cache entry */ + if (xdst == NULL) { + num_pols = 1; + pols[0] = __xfrm_policy_lookup(net, fl, family, + flow_to_policy_dir(dir)); + err = xfrm_expand_policies(fl, family, pols, + &num_pols, &num_xfrms); + if (err < 0) + goto inc_error; + if (num_pols == 0) + return NULL; + if (num_xfrms <= 0) + goto make_dummy_bundle; + } + + new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); + if (IS_ERR(new_xdst)) { + err = PTR_ERR(new_xdst); + if (err != -EAGAIN) + goto error; + if (oldflo == NULL) + goto make_dummy_bundle; + dst_hold(&xdst->u.dst); + return oldflo; + } else if (new_xdst == NULL) { + num_xfrms = 0; + if (oldflo == NULL) + goto make_dummy_bundle; + xdst->num_xfrms = 0; + dst_hold(&xdst->u.dst); + return oldflo; + } + + /* Kill the previous bundle */ + if (xdst) { + /* The policies were stolen for newly generated bundle */ + xdst->num_pols = 0; + dst_free(&xdst->u.dst); + } + + /* Flow cache does not have reference, it dst_free()'s, + * but we do need to return one reference for original caller */ + dst_hold(&new_xdst->u.dst); + return &new_xdst->flo; + +make_dummy_bundle: + /* We found policies, but there's no bundles to instantiate: + * either because the policy blocks, has no transformations or + * we could not build template (no xfrm_states).*/ + xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); + if (IS_ERR(xdst)) { + xfrm_pols_put(pols, num_pols); + return ERR_CAST(xdst); + } + xdst->num_pols = num_pols; + xdst->num_xfrms = num_xfrms; + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); + + dst_hold(&xdst->u.dst); + return &xdst->flo; + +inc_error: + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); error: - xfrm_pols_put(pols, npols); -dropdst: - dst_release(dst_orig); - *dst_p = NULL; - return err; + if (xdst != NULL) + dst_free(&xdst->u.dst); + else + xfrm_pols_put(pols, num_pols); + return ERR_PTR(err); +} -nopol: - err = -ENOENT; - if (flags & XFRM_LOOKUP_ICMP) - goto dropdst; - return 0; +static struct dst_entry *make_blackhole(struct net *net, u16 family, + struct dst_entry *dst_orig) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct dst_entry *ret; + + if (!afinfo) { + dst_release(dst_orig); + return ERR_PTR(-EINVAL); + } else { + ret = afinfo->blackhole_route(net, dst_orig); + } + xfrm_policy_put_afinfo(afinfo); + + return ret; } -EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +/* Main function: finds/creates a bundle for given flow. + * + * At the moment we eat a raw IP route. Mostly to speed up lookups + * on interfaces with disabled IPsec. + */ +struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, int flags) { - int err = __xfrm_lookup(dst_p, fl, sk, flags); + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + struct flow_cache_object *flo; + struct xfrm_dst *xdst; + struct dst_entry *dst, *route; + u16 family = dst_orig->ops->family; + u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + int i, err, num_pols, num_xfrms = 0, drop_pols = 0; + + dst = NULL; + xdst = NULL; + route = NULL; + + if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { + num_pols = 1; + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + err = xfrm_expand_policies(fl, family, pols, + &num_pols, &num_xfrms); + if (err < 0) + goto dropdst; + + if (num_pols) { + if (num_xfrms <= 0) { + drop_pols = num_pols; + goto no_transform; + } + + xdst = xfrm_resolve_and_create_bundle( + pols, num_pols, fl, + family, dst_orig); + if (IS_ERR(xdst)) { + xfrm_pols_put(pols, num_pols); + err = PTR_ERR(xdst); + goto dropdst; + } else if (xdst == NULL) { + num_xfrms = 0; + drop_pols = num_pols; + goto no_transform; + } + + dst_hold(&xdst->u.dst); + xdst->u.dst.flags |= DST_NOCACHE; + route = xdst->route; + } + } + + if (xdst == NULL) { + /* To accelerate a bit... */ + if ((dst_orig->flags & DST_NOXFRM) || + !net->xfrm.policy_count[XFRM_POLICY_OUT]) + goto nopol; + + flo = flow_cache_lookup(net, fl, family, dir, + xfrm_bundle_lookup, dst_orig); + if (flo == NULL) + goto nopol; + if (IS_ERR(flo)) { + err = PTR_ERR(flo); + goto dropdst; + } + xdst = container_of(flo, struct xfrm_dst, flo); + + num_pols = xdst->num_pols; + num_xfrms = xdst->num_xfrms; + memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols); + route = xdst->route; + } + + dst = &xdst->u.dst; + if (route == NULL && num_xfrms > 0) { + /* The only case when xfrm_bundle_lookup() returns a + * bundle with null route, is when the template could + * not be resolved. It means policies are there, but + * bundle could not be created, since we don't yet + * have the xfrm_state's. We need to wait for KM to + * negotiate new SA's or bail out with error.*/ + if (net->xfrm.sysctl_larval_drop) { + dst_release(dst); + xfrm_pols_put(pols, drop_pols); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); + + return make_blackhole(net, family, dst_orig); + } - if (err == -EREMOTE) { - dst_release(*dst_p); - *dst_p = NULL; err = -EAGAIN; + + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); + goto error; } - return err; +no_transform: + if (num_pols == 0) + goto nopol; + + if ((flags & XFRM_LOOKUP_ICMP) && + !(pols[0]->flags & XFRM_POLICY_ICMP)) { + err = -ENOENT; + goto error; + } + + for (i = 0; i < num_pols; i++) + pols[i]->curlft.use_time = get_seconds(); + + if (num_xfrms < 0) { + /* Prohibit the flow */ + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); + err = -EPERM; + goto error; + } else if (num_xfrms > 0) { + /* Flow transformed */ + dst_release(dst_orig); + } else { + /* Flow passes untransformed */ + dst_release(dst); + dst = dst_orig; + } +ok: + xfrm_pols_put(pols, drop_pols); + if (dst && dst->xfrm && + dst->xfrm->props.mode == XFRM_MODE_TUNNEL) + dst->flags |= DST_XFRM_TUNNEL; + return dst; + +nopol: + if (!(flags & XFRM_LOOKUP_ICMP)) { + dst = dst_orig; + goto ok; + } + err = -ENOENT; +error: + dst_release(dst); +dropdst: + dst_release(dst_orig); + xfrm_pols_put(pols, drop_pols); + return ERR_PTR(err); } EXPORT_SYMBOL(xfrm_lookup); static inline int -xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) +xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { struct xfrm_state *x; @@ -1763,7 +2215,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) */ static inline int -xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, +xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, unsigned short family) { if (xfrm_state_kern(x)) @@ -1772,7 +2224,7 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && - ((tmpl->aalgos & (1<<x->props.aalgo)) || + (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && !(x->props.mode != XFRM_MODE_TRANSPORT && xfrm_state_addr_cmp(tmpl, x, family)); @@ -1786,7 +2238,7 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, * Otherwise "-2 - errored_index" is returned. */ static inline int -xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, +xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, unsigned short family) { int idx = start; @@ -1818,13 +2270,13 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, return -EAFNOSUPPORT; afinfo->decode_session(skb, fl, reverse); - err = security_xfrm_decode_session(skb, &fl->secid); + err = security_xfrm_decode_session(skb, &fl->flowi_secid); xfrm_policy_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(__xfrm_decode_session); -static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) +static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { @@ -1839,6 +2291,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; @@ -1854,7 +2307,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -1864,10 +2317,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (skb->sp) { int i; - for (i=skb->sp->len-1; i>=0; i--) { + for (i = skb->sp->len-1; i >= 0; i--) { struct xfrm_state *x = skb->sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; } } @@ -1877,24 +2330,31 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } } - if (!pol) - pol = flow_cache_lookup(&fl, family, fl_dir, - xfrm_policy_lookup); + if (!pol) { + struct flow_cache_object *flo; + + flo = flow_cache_lookup(net, &fl, family, fl_dir, + xfrm_policy_lookup, NULL); + if (IS_ERR_OR_NULL(flo)) + pol = ERR_CAST(flo); + else + pol = container_of(flo, struct xfrm_policy, flo); + } if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } if (!pol) { if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); - XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } return 1; @@ -1903,19 +2363,19 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol->curlft.use_time = get_seconds(); pols[0] = pol; - npols ++; + npols++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } pols[1]->curlft.use_time = get_seconds(); - npols ++; + npols++; } } #endif @@ -1935,11 +2395,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (pi = 0; pi < npols; pi++) { if (pols[pi] != pol && pols[pi]->action != XFRM_POLICY_ALLOW) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); goto reject; } if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto reject_error; } for (i = 0; i < pols[pi]->xfrm_nr; i++) @@ -1947,7 +2407,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } xfrm_nr = ti; if (npols > 1) { - xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net); tpp = stp; } @@ -1963,20 +2423,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (k < -1) /* "-2 - errored_index" returned */ xerr_idx = -(2+k); - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } } if (secpath_has_nontransport(sp, k, &xerr_idx)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } xfrm_pols_put(pols, npols); return 1; } - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); reject: xfrm_secpath_reject(xerr_idx, skb, &fl); @@ -1988,15 +2448,25 @@ EXPORT_SYMBOL(__xfrm_policy_check); int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct flowi fl; + struct dst_entry *dst; + int res = 1; if (xfrm_decode_session(skb, &fl, family) < 0) { - /* XXX: we should have something like FWDHDRERROR here. */ - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; + skb_dst_force(skb); + + dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); + if (IS_ERR(dst)) { + res = 0; + dst = NULL; + } + skb_dst_set(skb, dst); + return res; } EXPORT_SYMBOL(__xfrm_route_forward); @@ -2005,12 +2475,13 @@ EXPORT_SYMBOL(__xfrm_route_forward); static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete - * to "-1" to force all XFRM destinations to get validated by - * dst_ops->check on every use. We do this because when a - * normal route referenced by an XFRM dst is obsoleted we do - * not go looking around for all parent referencing XFRM dsts - * so that we can invalidate them. It is just too much work. - * Instead we make the checks here on every use. For example: + * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to + * get validated by dst_ops->check on every use. We do this + * because when a normal route referenced by an XFRM dst is + * obsoleted we do not go looking around for all parent + * referencing XFRM dsts so that we can invalidate them. It + * is just too much work. Instead we make the checks here on + * every use. For example: * * XFRM dst A --> IPv4 dst X * @@ -2020,9 +2491,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * stale_bundle() check. * * When a policy's bundle is pruned, we dst_free() the XFRM - * dst which causes it's ->obsolete field to be set to a - * positive non-zero integer. If an XFRM dst has been pruned - * like this, we want to force a new route lookup. + * dst which causes it's ->obsolete field to be set to + * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like + * this, we want to force a new route lookup. */ if (dst->obsolete < 0 && !stale_bundle(dst)) return dst; @@ -2032,13 +2503,13 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); + return !xfrm_bundle_ok((struct xfrm_dst *)dst); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = dev->nd_net->loopback_dev; + dst->dev = dev_net(dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); } @@ -2048,7 +2519,6 @@ EXPORT_SYMBOL(xfrm_dst_ifdown); static void xfrm_link_failure(struct sk_buff *skb) { /* Impossible. Such dst must be popped before reaches point of failure. */ - return; } static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) @@ -2062,69 +2532,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) +void xfrm_garbage_collect(struct net *net) { - struct dst_entry *dst, **dstp; - - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = *gc_list_p; - *gc_list_p = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); + flow_cache_flush(net); } +EXPORT_SYMBOL(xfrm_garbage_collect); -static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) +static void xfrm_garbage_collect_deferred(struct net *net) { - struct dst_entry *gc_list = NULL; - int dir; - - read_lock_bh(&xfrm_policy_lock); - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - struct xfrm_policy *pol; - struct hlist_node *entry; - struct hlist_head *table; - int i; - - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) - prune_one_bundle(pol, func, &gc_list); - - table = xfrm_policy_bydst[dir].table; - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) - prune_one_bundle(pol, func, &gc_list); - } - } - read_unlock_bh(&xfrm_policy_lock); - - while (gc_list) { - struct dst_entry *dst = gc_list; - gc_list = dst->next; - dst_free(dst); - } -} - -static int unused_bundle(struct dst_entry *dst) -{ - return !atomic_read(&dst->__refcnt); -} - -static void __xfrm_garbage_collect(void) -{ - xfrm_prune_bundles(unused_bundle); -} - -static int xfrm_flush_bundles(void) -{ - xfrm_prune_bundles(stale_bundle); - return 0; + flow_cache_flush_deferred(net); } static void xfrm_init_pmtu(struct dst_entry *dst) @@ -2144,7 +2560,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) if (pmtu > route_mtu_cached) pmtu = route_mtu_cached; - dst->metrics[RTAX_MTU-1] = pmtu; + dst_metric_set(dst, RTAX_MTU, pmtu); } while ((dst = dst->next)); } @@ -2152,8 +2568,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) * still valid. */ -int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, - struct flowi *fl, int family, int strict) +static int xfrm_bundle_ok(struct xfrm_dst *first) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -2162,34 +2577,21 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0; -#ifdef CONFIG_XFRM_SUB_POLICY - if (fl) { - if (first->origin && !flow_cache_uli_match(first->origin, fl)) - return 0; - if (first->partner && - !xfrm_selector_match(first->partner, fl, family)) - return 0; - } -#endif + + if (dst->flags & DST_XFRM_QUEUE) + return 1; last = NULL; do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) - return 0; - if (fl && pol && - !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl)) - return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; - if (xdst->genid != dst->xfrm->genid) + if (xdst->xfrm_genid != dst->xfrm->genid) return 0; - - if (strict && fl && - !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) + if (xdst->num_pols > 0 && + xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0; mtu = dst_mtu(dst->child); @@ -2219,7 +2621,7 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, mtu = xfrm_state_mtu(dst->xfrm, mtu); if (mtu > last->route_mtu_cached) mtu = last->route_mtu_cached; - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); if (last == first) break; @@ -2231,16 +2633,34 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, return 1; } -EXPORT_SYMBOL(xfrm_bundle_ok); +static unsigned int xfrm_default_advmss(const struct dst_entry *dst) +{ + return dst_metric_advmss(dst->path); +} + +static unsigned int xfrm_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst_mtu(dst->path); +} + +static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + return dst->path->ops->neigh_lookup(dst, skb, daddr); +} int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { + struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { @@ -2249,15 +2669,42 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->kmem_cachep = xfrm_dst_cache; if (likely(dst_ops->check == NULL)) dst_ops->check = xfrm_dst_check; + if (likely(dst_ops->default_advmss == NULL)) + dst_ops->default_advmss = xfrm_default_advmss; + if (likely(dst_ops->mtu == NULL)) + dst_ops->mtu = xfrm_mtu; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) dst_ops->link_failure = xfrm_link_failure; + if (likely(dst_ops->neigh_lookup == NULL)) + dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(afinfo->garbage_collect == NULL)) - afinfo->garbage_collect = __xfrm_garbage_collect; - xfrm_policy_afinfo[afinfo->family] = afinfo; + afinfo->garbage_collect = xfrm_garbage_collect_deferred; + rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); } - write_unlock_bh(&xfrm_policy_afinfo_lock); + spin_unlock(&xfrm_policy_afinfo_lock); + + rtnl_lock(); + for_each_net(net) { + struct dst_ops *xfrm_dst_ops; + + switch (afinfo->family) { + case AF_INET: + xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + *xfrm_dst_ops = *afinfo->dst_ops; + } + rtnl_unlock(); + return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -2269,78 +2716,97 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock(&xfrm_policy_afinfo_lock); if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { - struct dst_ops *dst_ops = afinfo->dst_ops; - xfrm_policy_afinfo[afinfo->family] = NULL; - dst_ops->kmem_cachep = NULL; - dst_ops->check = NULL; - dst_ops->negative_advice = NULL; - dst_ops->link_failure = NULL; - afinfo->garbage_collect = NULL; - } + else + RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], + NULL); + } + spin_unlock(&xfrm_policy_afinfo_lock); + if (!err) { + struct dst_ops *dst_ops = afinfo->dst_ops; + + synchronize_rcu(); + + dst_ops->kmem_cachep = NULL; + dst_ops->check = NULL; + dst_ops->negative_advice = NULL; + dst_ops->link_failure = NULL; + afinfo->garbage_collect = NULL; } - write_unlock_bh(&xfrm_policy_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +static void __net_init xfrm_dst_ops_init(struct net *net) { struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - read_lock(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[family]; - if (unlikely(!afinfo)) - read_unlock(&xfrm_policy_afinfo_lock); - return afinfo; -} -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - read_unlock(&xfrm_policy_afinfo_lock); + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); + if (afinfo) + net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; +#if IS_ENABLED(CONFIG_IPV6) + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); + if (afinfo) + net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; +#endif + rcu_read_unlock(); } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; - - if (dev->nd_net != &init_net) - return NOTIFY_DONE; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_DOWN: - xfrm_flush_bundles(); + xfrm_garbage_collect(dev_net(dev)); } return NOTIFY_DONE; } static struct notifier_block xfrm_dev_notifier = { - xfrm_dev_event, - NULL, - 0 + .notifier_call = xfrm_dev_event, }; #ifdef CONFIG_XFRM_STATISTICS -static int __init xfrm_statistics_init(void) +static int __net_init xfrm_statistics_init(struct net *net) { - if (snmp_mib_init((void **)xfrm_statistics, - sizeof(struct linux_xfrm_mib)) < 0) + int rv; + net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); + if (!net->mib.xfrm_statistics) return -ENOMEM; + rv = xfrm_proc_init(net); + if (rv < 0) + free_percpu(net->mib.xfrm_statistics); + return rv; +} + +static void xfrm_statistics_fini(struct net *net) +{ + xfrm_proc_fini(net); + free_percpu(net->mib.xfrm_statistics); +} +#else +static int __net_init xfrm_statistics_init(struct net *net) +{ return 0; } + +static void xfrm_statistics_fini(struct net *net) +{ +} #endif -static void __init xfrm_policy_init(void) +static int __net_init xfrm_policy_init(struct net *net) { unsigned int hmask, sz; int dir; - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", + if (net_eq(net, &init_net)) + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); @@ -2348,38 +2814,129 @@ static void __init xfrm_policy_init(void) hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); - xfrm_policy_byidx = xfrm_hash_alloc(sz); - xfrm_idx_hmask = hmask; - if (!xfrm_policy_byidx) - panic("XFRM: failed to allocate byidx hash\n"); + net->xfrm.policy_byidx = xfrm_hash_alloc(sz); + if (!net->xfrm.policy_byidx) + goto out_byidx; + net->xfrm.policy_idx_hmask = hmask; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { struct xfrm_policy_hash *htab; - INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); + net->xfrm.policy_count[dir] = 0; + INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); - htab = &xfrm_policy_bydst[dir]; + htab = &net->xfrm.policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); - htab->hmask = hmask; if (!htab->table) - panic("XFRM: failed to allocate bydst hash\n"); + goto out_bydst; + htab->hmask = hmask; } - INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); - register_netdevice_notifier(&xfrm_dev_notifier); + INIT_LIST_HEAD(&net->xfrm.policy_all); + INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); + if (net_eq(net, &init_net)) + register_netdevice_notifier(&xfrm_dev_notifier); + return 0; + +out_bydst: + for (dir--; dir >= 0; dir--) { + struct xfrm_policy_hash *htab; + + htab = &net->xfrm.policy_bydst[dir]; + xfrm_hash_free(htab->table, sz); + } + xfrm_hash_free(net->xfrm.policy_byidx, sz); +out_byidx: + return -ENOMEM; } -void __init xfrm_init(void) +static void xfrm_policy_fini(struct net *net) { -#ifdef CONFIG_XFRM_STATISTICS - xfrm_statistics_init(); + unsigned int sz; + int dir; + + flush_work(&net->xfrm.policy_hash_work); +#ifdef CONFIG_XFRM_SUB_POLICY + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); #endif - xfrm_state_init(); - xfrm_policy_init(); + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); + + WARN_ON(!list_empty(&net->xfrm.policy_all)); + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); + + htab = &net->xfrm.policy_bydst[dir]; + sz = (htab->hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(htab->table)); + xfrm_hash_free(htab->table, sz); + } + + sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); + xfrm_hash_free(net->xfrm.policy_byidx, sz); +} + +static int __net_init xfrm_net_init(struct net *net) +{ + int rv; + + rv = xfrm_statistics_init(net); + if (rv < 0) + goto out_statistics; + rv = xfrm_state_init(net); + if (rv < 0) + goto out_state; + rv = xfrm_policy_init(net); + if (rv < 0) + goto out_policy; + xfrm_dst_ops_init(net); + rv = xfrm_sysctl_init(net); + if (rv < 0) + goto out_sysctl; + rv = flow_cache_init(net); + if (rv < 0) + goto out; + + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + rwlock_init(&net->xfrm.xfrm_policy_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + + return 0; + +out: + xfrm_sysctl_fini(net); +out_sysctl: + xfrm_policy_fini(net); +out_policy: + xfrm_state_fini(net); +out_state: + xfrm_statistics_fini(net); +out_statistics: + return rv; +} + +static void __net_exit xfrm_net_exit(struct net *net) +{ + flow_cache_fini(net); + xfrm_sysctl_fini(net); + xfrm_policy_fini(net); + xfrm_state_fini(net); + xfrm_statistics_fini(net); +} + +static struct pernet_operations __net_initdata xfrm_net_ops = { + .init = xfrm_net_init, + .exit = xfrm_net_exit, +}; + +void __init xfrm_init(void) +{ + register_pernet_subsys(&xfrm_net_ops); xfrm_input_init(); -#ifdef CONFIG_XFRM_STATISTICS - xfrm_proc_init(); -#endif } #ifdef CONFIG_AUDITSYSCALL @@ -2393,27 +2950,23 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); - switch(sel->family) { + switch (sel->family) { case AF_INET: - audit_log_format(audit_buf, " src=" NIPQUAD_FMT, - NIPQUAD(sel->saddr.a4)); + audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); if (sel->prefixlen_s != 32) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); - audit_log_format(audit_buf, " dst=" NIPQUAD_FMT, - NIPQUAD(sel->daddr.a4)); + audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); if (sel->prefixlen_d != 32) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); break; case AF_INET6: - audit_log_format(audit_buf, " src=" NIP6_FMT, - NIP6(*(struct in6_addr *)sel->saddr.a6)); + audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); if (sel->prefixlen_s != 128) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); - audit_log_format(audit_buf, " dst=" NIP6_FMT, - NIP6(*(struct in6_addr *)sel->daddr.a6)); + audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); if (sel->prefixlen_d != 128) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); @@ -2421,15 +2974,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } } -void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - u32 auid, u32 secid) +void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -2437,14 +2989,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - u32 auid, u32 secid) + bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -2453,38 +3005,37 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE -static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, - struct xfrm_selector *sel_tgt) +static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, + const struct xfrm_selector *sel_tgt) { if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { if (sel_tgt->family == sel_cmp->family && - xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr, - sel_cmp->family) == 0 && - xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr, - sel_cmp->family) == 0 && + xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, + sel_cmp->family) && + xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, + sel_cmp->family) && sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { - return 1; + return true; } } else { if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { - return 1; + return true; } } - return 0; + return false; } -static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, - u8 dir, u8 type) +static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, + u8 dir, u8 type, struct net *net) { struct xfrm_policy *pol, *ret = NULL; - struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U; - read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir); - hlist_for_each_entry(pol, entry, chain, bydst) { + read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ + chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); + hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; @@ -2492,8 +3043,8 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, break; } } - chain = &xfrm_policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { + chain = &net->xfrm.policy_inexact[dir]; + hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && pol->priority < priority) { @@ -2505,12 +3056,12 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, if (ret) xfrm_pol_hold(ret); - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } -static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) +static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) { int match = 0; @@ -2519,10 +3070,10 @@ static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: - if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr, - m->old_family) == 0 && - xfrm_addr_cmp(&t->saddr, &m->old_saddr, - m->old_family) == 0) { + if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, + m->old_family) && + xfrm_addr_equal(&t->saddr, &m->old_saddr, + m->old_family)) { match = 1; } break; @@ -2544,11 +3095,10 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, struct xfrm_migrate *m, int num_migrate) { struct xfrm_migrate *mp; - struct dst_entry *dst; int i, j, n = 0; write_lock_bh(&pol->lock); - if (unlikely(pol->dead)) { + if (unlikely(pol->walk.dead)) { /* target policy has been deleted */ write_unlock_bh(&pol->lock); return -ENOENT; @@ -2569,10 +3119,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, sizeof(pol->xfrm_vec[i].saddr)); pol->xfrm_vec[i].encap_family = mp->new_family; /* flush bundles */ - while ((dst = pol->bundles) != NULL) { - pol->bundles = dst->next; - dst_free(dst); - } + atomic_inc(&pol->genid); } } @@ -2584,7 +3131,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, return 0; } -static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) +static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) { int i, j; @@ -2592,10 +3139,10 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) return -EINVAL; for (i = 0; i < num_migrate; i++) { - if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr, - m[i].old_family) == 0) && - (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr, - m[i].old_family) == 0)) + if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, + m[i].old_family) && + xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, + m[i].old_family)) return -EINVAL; if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) @@ -2618,8 +3165,9 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) return 0; } -int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) +int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k, struct net *net) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -2632,14 +3180,14 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, goto out; /* Stage 1 - find policy */ - if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { + if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { err = -ENOENT; goto out; } /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { - if ((x = xfrm_migrate_state_find(mp))) { + if ((x = xfrm_migrate_state_find(mp, net))) { x_cur[nx_cur] = x; nx_cur++; if ((xc = xfrm_state_migrate(x, mp))) { @@ -2663,7 +3211,7 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate); + km_migrate(sel, dir, type, m, num_migrate, k); xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 2b0db13f0cd..9c4fbd8935f 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -12,10 +12,11 @@ */ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/export.h> #include <net/snmp.h> #include <net/xfrm.h> -static struct snmp_mib xfrm_mib_list[] = { +static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmInError", LINUX_MIB_XFRMINERROR), SNMP_MIB_ITEM("XfrmInBufferError", LINUX_MIB_XFRMINBUFFERERROR), SNMP_MIB_ITEM("XfrmInHdrError", LINUX_MIB_XFRMINHDRERROR), @@ -41,57 +42,45 @@ static struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK), SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), + SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), + SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), + SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_SENTINEL }; -static unsigned long -fold_field(void *mib[], int offt) -{ - unsigned long res = 0; - int i; - - for_each_possible_cpu(i) { - res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt); - res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt); - } - return res; -} - static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; int i; - for (i=0; xfrm_mib_list[i].name; i++) + for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - fold_field((void **)xfrm_statistics, - xfrm_mib_list[i].entry)); + snmp_fold_field(net->mib.xfrm_statistics, + xfrm_mib_list[i].entry)); return 0; } static int xfrm_statistics_seq_open(struct inode *inode, struct file *file) { - return single_open(file, xfrm_statistics_seq_show, NULL); + return single_open_net(inode, file, xfrm_statistics_seq_show); } -static struct file_operations xfrm_statistics_seq_fops = { +static const struct file_operations xfrm_statistics_seq_fops = { .owner = THIS_MODULE, .open = xfrm_statistics_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; -int __init xfrm_proc_init(void) +int __net_init xfrm_proc_init(struct net *net) { - int rc = 0; - - if (!proc_net_fops_create(&init_net, "xfrm_stat", S_IRUGO, - &xfrm_statistics_seq_fops)) - goto stat_fail; - - out: - return rc; + if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net, + &xfrm_statistics_seq_fops)) + return -ENOMEM; + return 0; +} - stat_fail: - rc = -ENOMEM; - goto out; +void xfrm_proc_fini(struct net *net) +{ + remove_proc_entry("xfrm_stat", net->proc_net); } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c new file mode 100644 index 00000000000..dab57daae40 --- /dev/null +++ b/net/xfrm/xfrm_replay.c @@ -0,0 +1,603 @@ +/* + * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c. + * + * Copyright (C) 2010 secunet Security Networks AG + * Copyright (C) 2010 Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/export.h> +#include <net/xfrm.h> + +u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) +{ + u32 seq, seq_hi, bottom; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!(x->props.flags & XFRM_STATE_ESN)) + return 0; + + seq = ntohl(net_seq); + seq_hi = replay_esn->seq_hi; + bottom = replay_esn->seq - replay_esn->replay_window + 1; + + if (likely(replay_esn->seq >= replay_esn->replay_window - 1)) { + /* A. same subspace */ + if (unlikely(seq < bottom)) + seq_hi++; + } else { + /* B. window spans two subspaces */ + if (unlikely(seq >= bottom)) + seq_hi--; + } + + return seq_hi; +} + +static void xfrm_replay_notify(struct xfrm_state *x, int event) +{ + struct km_event c; + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (!x->replay_maxdiff || + ((x->replay.seq - x->preplay.seq < x->replay_maxdiff) && + (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(&x->replay, &x->preplay, + sizeof(struct xfrm_replay_state)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; + if (unlikely(x->replay.oseq == 0)) { + x->replay.oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (!x->props.replay_window) + return 0; + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > x->replay.seq)) + return 0; + + diff = x->replay.seq - seq; + if (diff >= x->props.replay_window) { + x->stats.replay_window++; + goto err; + } + + if (x->replay.bitmap & (1U << diff)) { + x->stats.replay++; + goto err; + } + return 0; + +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (!x->props.replay_window) + return; + + if (seq > x->replay.seq) { + diff = seq - x->replay.seq; + if (diff < x->props.replay_window) + x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; + else + x->replay.bitmap = 1; + x->replay.seq = seq; + } else { + diff = x->replay.seq - seq; + x->replay.bitmap |= (1U << diff); + } + + if (xfrm_aevent_is_on(xs_net(x))) + x->repl->notify(x, XFRM_REPLAY_UPDATE); +} + +static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + if (unlikely(replay_esn->oseq == 0)) { + replay_esn->oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_bmp(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 pos; + u32 seq = ntohl(net_seq); + u32 diff = replay_esn->seq - seq; + + if (!replay_esn->replay_window) + return 0; + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > replay_esn->seq)) + return 0; + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (pos >= diff) + bitnr = (pos - diff) % replay_esn->replay_window; + else + bitnr = replay_esn->replay_window - (diff - pos); + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 pos; + + if (!replay_esn->replay_window) + return; + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (seq > replay_esn->seq) { + diff = seq - replay_esn->seq; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + } else { + nr = (replay_esn->replay_window - 1) >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + } + + bitnr = (pos + diff) % replay_esn->replay_window; + replay_esn->seq = seq; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) + bitnr = (pos - diff) % replay_esn->replay_window; + else + bitnr = replay_esn->replay_window - (diff - pos); + } + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + + if (xfrm_aevent_is_on(xs_net(x))) + x->repl->notify(x, XFRM_REPLAY_UPDATE); +} + +static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) +{ + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (!x->replay_maxdiff || + ((replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && + (replay_esn->oseq - preplay_esn->oseq + < x->replay_maxdiff))) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) +{ + u32 seq_diff, oseq_diff; + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (x->replay_maxdiff) { + if (replay_esn->seq_hi == preplay_esn->seq_hi) + seq_diff = replay_esn->seq - preplay_esn->seq; + else + seq_diff = ~preplay_esn->seq + replay_esn->seq + + 1; + + if (replay_esn->oseq_hi == preplay_esn->oseq_hi) + oseq_diff = replay_esn->oseq + - preplay_esn->oseq; + else + oseq_diff = ~preplay_esn->oseq + + replay_esn->oseq + 1; + + if (seq_diff >= x->replay_maxdiff || + oseq_diff >= x->replay_maxdiff) + break; + } + + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + XFRM_SKB_CB(skb)->seq.output.hi = replay_esn->oseq_hi; + + if (unlikely(replay_esn->oseq == 0)) { + XFRM_SKB_CB(skb)->seq.output.hi = ++replay_esn->oseq_hi; + + if (replay_esn->oseq_hi == 0) { + replay_esn->oseq--; + replay_esn->oseq_hi--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 pos; + u32 seq = ntohl(net_seq); + u32 wsize = replay_esn->replay_window; + u32 top = replay_esn->seq; + u32 bottom = top - wsize + 1; + + if (!wsize) + return 0; + + if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && + (replay_esn->seq < replay_esn->replay_window - 1))) + goto err; + + diff = top - seq; + + if (likely(top >= wsize - 1)) { + /* A. same subspace */ + if (likely(seq > top) || seq < bottom) + return 0; + } else { + /* B. window spans two subspaces */ + if (likely(seq > top && seq < bottom)) + return 0; + if (seq >= bottom) + diff = ~seq + top + 1; + } + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (pos >= diff) + bitnr = (pos - diff) % replay_esn->replay_window; + else + bitnr = replay_esn->replay_window - (diff - pos); + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static int xfrm_replay_recheck_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi != + htonl(xfrm_replay_seqhi(x, net_seq)))) { + x->stats.replay_window++; + return -EINVAL; + } + + return xfrm_replay_check_esn(x, skb, net_seq); +} + +static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + int wrap; + u32 diff, pos, seq, seq_hi; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!replay_esn->replay_window) + return; + + seq = ntohl(net_seq); + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + seq_hi = xfrm_replay_seqhi(x, net_seq); + wrap = seq_hi - replay_esn->seq_hi; + + if ((!wrap && seq > replay_esn->seq) || wrap > 0) { + if (likely(!wrap)) + diff = seq - replay_esn->seq; + else + diff = ~replay_esn->seq + seq + 1; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + } else { + nr = (replay_esn->replay_window - 1) >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + } + + bitnr = (pos + diff) % replay_esn->replay_window; + replay_esn->seq = seq; + + if (unlikely(wrap > 0)) + replay_esn->seq_hi++; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) + bitnr = (pos - diff) % replay_esn->replay_window; + else + bitnr = replay_esn->replay_window - (diff - pos); + } + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + + if (xfrm_aevent_is_on(xs_net(x))) + x->repl->notify(x, XFRM_REPLAY_UPDATE); +} + +static struct xfrm_replay xfrm_replay_legacy = { + .advance = xfrm_replay_advance, + .check = xfrm_replay_check, + .recheck = xfrm_replay_check, + .notify = xfrm_replay_notify, + .overflow = xfrm_replay_overflow, +}; + +static struct xfrm_replay xfrm_replay_bmp = { + .advance = xfrm_replay_advance_bmp, + .check = xfrm_replay_check_bmp, + .recheck = xfrm_replay_check_bmp, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_bmp, +}; + +static struct xfrm_replay xfrm_replay_esn = { + .advance = xfrm_replay_advance_esn, + .check = xfrm_replay_check_esn, + .recheck = xfrm_replay_recheck_esn, + .notify = xfrm_replay_notify_esn, + .overflow = xfrm_replay_overflow_esn, +}; + +int xfrm_init_replay(struct xfrm_state *x) +{ + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (replay_esn) { + if (replay_esn->replay_window > + replay_esn->bmp_len * sizeof(__u32) * 8) + return -EINVAL; + + if (x->props.flags & XFRM_STATE_ESN) { + if (replay_esn->replay_window == 0) + return -EINVAL; + x->repl = &xfrm_replay_esn; + } else + x->repl = &xfrm_replay_bmp; + } else + x->repl = &xfrm_replay_legacy; + + return 0; +} +EXPORT_SYMBOL(xfrm_init_replay); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 58f1f9347b5..0ab54134bb4 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -21,20 +21,13 @@ #include <linux/cache.h> #include <linux/audit.h> #include <asm/uaccess.h> +#include <linux/ktime.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> #include "xfrm_hash.h" -struct sock *xfrm_nl; -EXPORT_SYMBOL(xfrm_nl); - -u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; -EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); - -u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE; -EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); - -u32 sysctl_xfrm_acq_expires __read_mostly = 30; - /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -42,51 +35,30 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30; destination/tunnel endpoint. (output) */ -static DEFINE_SPINLOCK(xfrm_state_lock); - -/* Hash table to find appropriate SA towards given target (endpoint - * of tunnel or destination of transport mode) allowed by selector. - * - * Main use is finding SA after policy selected tunnel or transport mode. - * Also, it can be used by ah/esp icmp error handler to find offending SA. - */ -static struct hlist_head *xfrm_state_bydst __read_mostly; -static struct hlist_head *xfrm_state_bysrc __read_mostly; -static struct hlist_head *xfrm_state_byspi __read_mostly; -static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static unsigned int xfrm_state_num; -static unsigned int xfrm_state_genid; -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); - -#ifdef CONFIG_AUDITSYSCALL -static void xfrm_audit_state_replay(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq); -#else -#define xfrm_audit_state_replay(x, s, sq) do { ; } while (0) -#endif /* CONFIG_AUDITSYSCALL */ - -static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, - xfrm_address_t *saddr, +static inline unsigned int xfrm_dst_hash(struct net *net, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, u32 reqid, unsigned short family) { - return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); + return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); } -static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr, - xfrm_address_t *saddr, +static inline unsigned int xfrm_src_hash(struct net *net, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, unsigned short family) { - return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask); + return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); } static inline unsigned int -xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, + __be32 spi, u8 proto, unsigned short family) { - return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); + return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } static void xfrm_hash_transfer(struct hlist_head *list, @@ -95,10 +67,10 @@ static void xfrm_hash_transfer(struct hlist_head *list, struct hlist_head *nspitable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct xfrm_state *x; - hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { + hlist_for_each_entry_safe(x, tmp, list, bydst) { unsigned int h; h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, @@ -120,16 +92,16 @@ static void xfrm_hash_transfer(struct hlist_head *list, } } -static unsigned long xfrm_hash_new_size(void) +static unsigned long xfrm_hash_new_size(unsigned int state_hmask) { - return ((xfrm_state_hmask + 1) << 1) * - sizeof(struct hlist_head); + return ((state_hmask + 1) << 1) * sizeof(struct hlist_head); } static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(struct work_struct *__unused) +static void xfrm_hash_resize(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.state_hash_work); struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; unsigned long nsize, osize; unsigned int nhashmask, ohashmask; @@ -137,7 +109,7 @@ static void xfrm_hash_resize(struct work_struct *__unused) mutex_lock(&hash_resize_mutex); - nsize = xfrm_hash_new_size(); + nsize = xfrm_hash_new_size(net->xfrm.state_hmask); ndst = xfrm_hash_alloc(nsize); if (!ndst) goto out_unlock; @@ -153,24 +125,24 @@ static void xfrm_hash_resize(struct work_struct *__unused) goto out_unlock; } - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; - for (i = xfrm_state_hmask; i >= 0; i--) - xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, + for (i = net->xfrm.state_hmask; i >= 0; i--) + xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, nhashmask); - odst = xfrm_state_bydst; - osrc = xfrm_state_bysrc; - ospi = xfrm_state_byspi; - ohashmask = xfrm_state_hmask; + odst = net->xfrm.state_bydst; + osrc = net->xfrm.state_bysrc; + ospi = net->xfrm.state_byspi; + ohashmask = net->xfrm.state_hmask; - xfrm_state_bydst = ndst; - xfrm_state_bysrc = nsrc; - xfrm_state_byspi = nspi; - xfrm_state_hmask = nhashmask; + net->xfrm.state_bydst = ndst; + net->xfrm.state_bysrc = nsrc; + net->xfrm.state_byspi = nspi; + net->xfrm.state_hmask = nhashmask; - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); xfrm_hash_free(odst, osize); @@ -181,75 +153,56 @@ out_unlock: mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); - -DECLARE_WAIT_QUEUE_HEAD(km_waitq); -EXPORT_SYMBOL(km_waitq); +static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); +static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO]; -static DEFINE_RWLOCK(xfrm_state_afinfo_lock); -static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; - -static struct work_struct xfrm_state_gc_work; -static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid); - -static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) -{ - struct xfrm_state_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - write_lock_bh(&xfrm_state_afinfo_lock); - afinfo = xfrm_state_afinfo[family]; - if (unlikely(!afinfo)) - write_unlock_bh(&xfrm_state_afinfo_lock); - return afinfo; -} - -static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) - __releases(xfrm_state_afinfo_lock) -{ - write_unlock_bh(&xfrm_state_afinfo_lock); -} +bool km_is_alive(const struct km_event *c); +void km_state_expired(struct xfrm_state *x, int hard, u32 portid); +static DEFINE_SPINLOCK(xfrm_type_lock); int xfrm_register_type(const struct xfrm_type *type, unsigned short family) { - struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); const struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; + spin_lock_bh(&xfrm_type_lock); if (likely(typemap[type->proto] == NULL)) typemap[type->proto] = type; else err = -EEXIST; - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_type_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_register_type); int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) { - struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); const struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; + spin_lock_bh(&xfrm_type_lock); if (unlikely(typemap[type->proto] != type)) err = -ENOENT; else typemap[type->proto] = NULL; - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_type_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_unregister_type); @@ -286,6 +239,7 @@ static void xfrm_put_type(const struct xfrm_type *type) module_put(type->owner); } +static DEFINE_SPINLOCK(xfrm_mode_lock); int xfrm_register_mode(struct xfrm_mode *mode, int family) { struct xfrm_state_afinfo *afinfo; @@ -295,12 +249,13 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) if (unlikely(mode->encap >= XFRM_MODE_MAX)) return -EINVAL; - afinfo = xfrm_state_lock_afinfo(family); + afinfo = xfrm_state_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; err = -EEXIST; modemap = afinfo->mode_map; + spin_lock_bh(&xfrm_mode_lock); if (modemap[mode->encap]) goto out; @@ -313,7 +268,8 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) err = 0; out: - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_mode_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_register_mode); @@ -327,19 +283,21 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family) if (unlikely(mode->encap >= XFRM_MODE_MAX)) return -EINVAL; - afinfo = xfrm_state_lock_afinfo(family); + afinfo = xfrm_state_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; err = -ENOENT; modemap = afinfo->mode_map; + spin_lock_bh(&xfrm_mode_lock); if (likely(modemap[mode->encap] == mode)) { modemap[mode->encap] = NULL; module_put(mode->afinfo->owner); err = 0; } - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_mode_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_unregister_mode); @@ -379,13 +337,15 @@ static void xfrm_put_mode(struct xfrm_mode *mode) static void xfrm_state_gc_destroy(struct xfrm_state *x) { - del_timer_sync(&x->timer); + tasklet_hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); kfree(x->encap); kfree(x->coaddr); + kfree(x->replay_esn); + kfree(x->preplay_esn); if (x->inner_mode) xfrm_put_mode(x->inner_mode); if (x->inner_mode_iaf) @@ -400,21 +360,19 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x); } -static void xfrm_state_gc_task(struct work_struct *data) +static void xfrm_state_gc_task(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - gc_list.first = xfrm_state_gc_list.first; - INIT_HLIST_HEAD(&xfrm_state_gc_list); + hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) + hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - - wake_up(&km_waitq); } static inline unsigned long make_jiffies(long secs) @@ -425,9 +383,10 @@ static inline unsigned long make_jiffies(long secs) return secs*HZ; } -static void xfrm_timer_handler(unsigned long data) +static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) { - struct xfrm_state *x = (struct xfrm_state*)data; + struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); + struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -441,8 +400,17 @@ static void xfrm_timer_handler(unsigned long data) if (x->lft.hard_add_expires_seconds) { long tmo = x->lft.hard_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) - goto expired; + if (tmo <= 0) { + if (x->xflags & XFRM_SOFT_EXPIRE) { + /* enter hard expire without soft expire first?! + * setting a new date could trigger this. + * workarbound: fix x->curflt.add_time by below: + */ + x->curlft.add_time = now - x->saved_tmo - 1; + tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; + } else + goto expired; + } if (tmo < next) next = tmo; } @@ -459,10 +427,14 @@ static void xfrm_timer_handler(unsigned long data) if (x->lft.soft_add_expires_seconds) { long tmo = x->lft.soft_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) + if (tmo <= 0) { warn = 1; - else if (tmo < next) + x->xflags &= ~XFRM_SOFT_EXPIRE; + } else if (tmo < next) { next = tmo; + x->xflags |= XFRM_SOFT_EXPIRE; + x->saved_tmo = tmo; + } } if (x->lft.soft_use_expires_seconds) { long tmo = x->lft.soft_use_expires_seconds + @@ -477,45 +449,45 @@ static void xfrm_timer_handler(unsigned long data) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX) - mod_timer(&x->timer, jiffies + make_jiffies(next)); + if (next != LONG_MAX) { + tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); + } goto out; expired: - if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { + if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) x->km.state = XFRM_STATE_EXPIRED; - wake_up(&km_waitq); - next = 2; - goto resched; - } err = __xfrm_state_delete(x); - if (!err && x->id.spi) + if (!err) km_state_expired(x, 1, 0); - xfrm_audit_state_delete(x, err ? 0 : 1, - audit_get_loginuid(current), 0); + xfrm_audit_state_delete(x, err ? 0 : 1, true); out: spin_unlock(&x->lock); + return HRTIMER_NORESTART; } static void xfrm_replay_timer_handler(unsigned long data); -struct xfrm_state *xfrm_state_alloc(void) +struct xfrm_state *xfrm_state_alloc(struct net *net) { struct xfrm_state *x; x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC); if (x) { + write_pnet(&x->xs_net, net); atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); + INIT_LIST_HEAD(&x->km.all); INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); - setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x); + tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, + CLOCK_BOOTTIME, HRTIMER_MODE_ABS); setup_timer(&x->rtimer, xfrm_replay_timer_handler, (unsigned long)x); x->curlft.add_time = get_seconds(); @@ -535,28 +507,32 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - BUG_TRAP(x->km.state == XFRM_STATE_DEAD); + struct net *net = xs_net(x); + + WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->bydst, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&xfrm_state_gc_work); + schedule_work(&net->xfrm.state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); int __xfrm_state_delete(struct xfrm_state *x) { + struct net *net = xs_net(x); int err = -ESRCH; if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; - spin_lock(&xfrm_state_lock); + spin_lock(&net->xfrm.xfrm_state_lock); + list_del(&x->km.all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) hlist_del(&x->byspi); - xfrm_state_num--; - spin_unlock(&xfrm_state_lock); + net->xfrm.state_num--; + spin_unlock(&net->xfrm.xfrm_state_lock); /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that @@ -584,20 +560,17 @@ EXPORT_SYMBOL(xfrm_state_delete); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { int i, err = 0; - for (i = 0; i <= xfrm_state_hmask; i++) { - struct hlist_node *entry; + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { - xfrm_audit_state_delete(x, 0, - audit_info->loginuid, - audit_info->secid); + xfrm_audit_state_delete(x, 0, task_valid); return err; } } @@ -607,100 +580,101 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) } #else static inline int -xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { return 0; } #endif -int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) { - int i, err = 0; + int i, err = 0, cnt = 0; - spin_lock_bh(&xfrm_state_lock); - err = xfrm_state_flush_secctx_check(proto, audit_info); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + err = xfrm_state_flush_secctx_check(net, proto, task_valid); if (err) goto out; - for (i = 0; i <= xfrm_state_hmask; i++) { - struct hlist_node *entry; + err = -ESRCH; + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; restart: - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, - audit_info->loginuid, - audit_info->secid); + task_valid); xfrm_state_put(x); + if (!err) + cnt++; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); goto restart; } } } - err = 0; + if (cnt) + err = 0; out: - spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_flush); -void xfrm_sad_getinfo(struct xfrmk_sadinfo *si) +void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { - spin_lock_bh(&xfrm_state_lock); - si->sadcnt = xfrm_state_num; - si->sadhcnt = xfrm_state_hmask; + spin_lock_bh(&net->xfrm.xfrm_state_lock); + si->sadcnt = net->xfrm.state_num; + si->sadhcnt = net->xfrm.state_hmask; si->sadhmcnt = xfrm_state_hashmax; - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } EXPORT_SYMBOL(xfrm_sad_getinfo); static int -xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, - unsigned short family) +xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, + const struct xfrm_tmpl *tmpl, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, + unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -1; - afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); + afinfo->init_tempsel(&x->sel, fl); + + if (family != tmpl->encap_family) { + xfrm_state_put_afinfo(afinfo); + afinfo = xfrm_state_get_afinfo(tmpl->encap_family); + if (!afinfo) + return -1; + } + afinfo->init_temprop(x, tmpl, daddr, saddr); xfrm_state_put_afinfo(afinfo); return 0; } -static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, + const xfrm_address_t *daddr, + __be32 spi, u8 proto, + unsigned short family) { - unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); + unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; - struct hlist_node *entry; - hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { + hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || - x->id.proto != proto) + x->id.proto != proto || + !xfrm_addr_equal(&x->id.daddr, daddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6)) - continue; - break; - } - + if ((mark & x->mark.m) != x->mark.v) + continue; xfrm_state_hold(x); return x; } @@ -708,34 +682,23 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(daddr, saddr, family); + unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; - struct hlist_node *entry; - hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || - x->id.proto != proto) + x->id.proto != proto || + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6) || - !ipv6_addr_equal((struct in6_addr *)saddr, - (struct in6_addr *) - x->props.saddr.a6)) - continue; - break; - } - + if ((mark & x->mark.m) != x->mark.v) + continue; xfrm_state_hold(x); return x; } @@ -746,122 +709,170 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { + struct net *net = xs_net(x); + u32 mark = x->mark.v & x->mark.m; + if (use_spi) - return __xfrm_state_lookup(&x->id.daddr, x->id.spi, - x->id.proto, family); + return __xfrm_state_lookup(net, mark, &x->id.daddr, + x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(&x->id.daddr, + return __xfrm_state_lookup_byaddr(net, mark, + &x->id.daddr, &x->props.saddr, x->id.proto, family); } -static void xfrm_hash_grow_check(int have_hash_collision) +static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) { if (have_hash_collision && - (xfrm_state_hmask + 1) < xfrm_state_hashmax && - xfrm_state_num > xfrm_state_hmask) - schedule_work(&xfrm_hash_work); + (net->xfrm.state_hmask + 1) < xfrm_state_hashmax && + net->xfrm.state_num > net->xfrm.state_hmask) + schedule_work(&net->xfrm.state_hash_work); +} + +static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, + const struct flowi *fl, unsigned short family, + struct xfrm_state **best, int *acq_in_progress, + int *error) +{ + /* Resolution logic: + * 1. There is a valid state with matching selector. Done. + * 2. Valid state with inappropriate selector. Skip. + * + * Entering area of "sysdeps". + * + * 3. If state is not valid, selector is temporary, it selects + * only session which triggered previous resolution. Key + * manager will do something to install a state with proper + * selector. + */ + if (x->km.state == XFRM_STATE_VALID) { + if ((x->sel.family && + !xfrm_selector_match(&x->sel, fl, x->sel.family)) || + !security_xfrm_state_pol_flow_match(x, pol, fl)) + return; + + if (!*best || + (*best)->km.dying > x->km.dying || + ((*best)->km.dying == x->km.dying && + (*best)->curlft.add_time < x->curlft.add_time)) + *best = x; + } else if (x->km.state == XFRM_STATE_ACQ) { + *acq_in_progress = 1; + } else if (x->km.state == XFRM_STATE_ERROR || + x->km.state == XFRM_STATE_EXPIRED) { + if (xfrm_selector_match(&x->sel, fl, x->sel.family) && + security_xfrm_state_pol_flow_match(x, pol, fl)) + *error = -ESRCH; + } } struct xfrm_state * -xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, - struct flowi *fl, struct xfrm_tmpl *tmpl, +xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + const struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned int h; - struct hlist_node *entry; - struct xfrm_state *x, *x0; + static xfrm_address_t saddr_wildcard = { }; + struct net *net = xp_net(pol); + unsigned int h, h_wildcard; + struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; + u32 mark = pol->mark.v & pol->mark.m; + unsigned short encap_family = tmpl->encap_family; + struct km_event c; - spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { - if (x->props.family == family && + to_put = NULL; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && + (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && - (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) { - /* Resolution logic: - 1. There is a valid state with matching selector. - Done. - 2. Valid state with inappropriate selector. Skip. - - Entering area of "sysdeps". - - 3. If state is not valid, selector is temporary, - it selects only session which triggered - previous resolution. Key manager will do - something to install a state with proper - selector. - */ - if (x->km.state == XFRM_STATE_VALID) { - if ((x->sel.family && !xfrm_selector_match(&x->sel, fl, x->sel.family)) || - !security_xfrm_state_pol_flow_match(x, pol, fl)) - continue; - if (!best || - best->km.dying > x->km.dying || - (best->km.dying == x->km.dying && - best->curlft.add_time < x->curlft.add_time)) - best = x; - } else if (x->km.state == XFRM_STATE_ACQ) { - acquire_in_progress = 1; - } else if (x->km.state == XFRM_STATE_ERROR || - x->km.state == XFRM_STATE_EXPIRED) { - if (xfrm_selector_match(&x->sel, fl, x->sel.family) && - security_xfrm_state_pol_flow_match(x, pol, fl)) - error = -ESRCH; - } - } + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, encap_family, + &best, &acquire_in_progress, &error); + } + if (best || acquire_in_progress) + goto found; + + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); + hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { + if (x->props.family == encap_family && + x->props.reqid == tmpl->reqid && + (mark & x->mark.m) == x->mark.v && + !(x->props.flags & XFRM_STATE_WILDRECV) && + xfrm_addr_equal(&x->id.daddr, daddr, encap_family) && + tmpl->mode == x->props.mode && + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, encap_family, + &best, &acquire_in_progress, &error); } +found: x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, - tmpl->id.proto, family)) != NULL) { - xfrm_state_put(x0); + (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, + tmpl->id.proto, encap_family)) != NULL) { + to_put = x0; error = -EEXIST; goto out; } - x = xfrm_state_alloc(); + + c.net = net; + /* If the KMs have no listeners (yet...), avoid allocating an SA + * for each and every packet - garbage collection might not + * handle the flood. + */ + if (!km_is_alive(&c)) { + error = -ESRCH; + goto out; + } + + x = xfrm_state_alloc(net); if (x == NULL) { error = -ENOMEM; goto out; } - /* Initialize temporary selector matching only + /* Initialize temporary state matching only * to current session. */ - xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); + memcpy(&x->mark, &pol->mark, sizeof(x->mark)); - error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); if (error) { x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); + to_put = x; x = NULL; goto out; } if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(daddr, saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, encap_family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; - add_timer(&x->timer); - xfrm_state_num++; - xfrm_hash_grow_check(x->bydst.next != NULL); + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; + tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); + net->xfrm.state_num++; + xfrm_hash_grow_check(net, x->bydst.next != NULL); } else { x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); + to_put = x; x = NULL; error = -ESRCH; } @@ -871,23 +882,26 @@ out: xfrm_state_hold(x); else *err = acquire_in_progress ? -EAGAIN : error; - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + if (to_put) + xfrm_state_put(to_put); return x; } struct xfrm_state * -xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_stateonly_find(struct net *net, u32 mark, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid) { unsigned int h; struct xfrm_state *rx = NULL, *x = NULL; - struct hlist_node *entry; - spin_lock(&xfrm_state_lock); - h = xfrm_dst_hash(daddr, saddr, reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); + h = xfrm_dst_hash(net, daddr, saddr, reqid, family); + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && + (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && mode == x->props.mode && @@ -900,104 +914,122 @@ xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (rx) xfrm_state_hold(rx); - spin_unlock(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return rx; } EXPORT_SYMBOL(xfrm_stateonly_find); +struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, + unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_walk *w; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + list_for_each_entry(w, &net->xfrm.state_all, all) { + x = container_of(w, struct xfrm_state, km); + if (x->props.family != family || + x->id.spi != spi) + continue; + + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + xfrm_state_hold(x); + return x; + } + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + return NULL; +} +EXPORT_SYMBOL(xfrm_state_lookup_byspi); + static void __xfrm_state_insert(struct xfrm_state *x) { + struct net *net = xs_net(x); unsigned int h; - x->genid = ++xfrm_state_genid; + list_add(&x->km.all, &net->xfrm.state_all); - h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); - h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } - mod_timer(&x->timer, jiffies + HZ); + tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); - wake_up(&km_waitq); - - xfrm_state_num++; + net->xfrm.state_num++; - xfrm_hash_grow_check(x->bydst.next != NULL); + xfrm_hash_grow_check(net, x->bydst.next != NULL); } -/* xfrm_state_lock is held */ +/* net->xfrm.xfrm_state_lock is held */ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) { + struct net *net = xs_net(xnew); unsigned short family = xnew->props.family; u32 reqid = xnew->props.reqid; struct xfrm_state *x; - struct hlist_node *entry; unsigned int h; + u32 mark = xnew->mark.v & xnew->mark.m; - h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && - !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && - !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) - x->genid = xfrm_state_genid; + (mark & x->mark.m) == x->mark.v && + xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && + xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) + x->genid++; } } void xfrm_state_insert(struct xfrm_state *x) { - spin_lock_bh(&xfrm_state_lock); + struct net *net = xs_net(x); + + spin_lock_bh(&net->xfrm.xfrm_state_lock); __xfrm_state_bump_genids(x); __xfrm_state_insert(x); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_insert); -/* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +/* net->xfrm.xfrm_state_lock is held */ +static struct xfrm_state *__find_acq_core(struct net *net, + const struct xfrm_mark *m, + unsigned short family, u8 mode, + u32 reqid, u8 proto, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + int create) { - unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); - struct hlist_node *entry; + unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct xfrm_state *x; + u32 mark = m->v & m->m; - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || x->km.state != XFRM_STATE_ACQ || x->id.spi != 0 || - x->id.proto != proto) + x->id.proto != proto || + (mark & x->mark.m) != x->mark.v || + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, - (struct in6_addr *)daddr) || - !ipv6_addr_equal((struct in6_addr *) - x->props.saddr.a6, - (struct in6_addr *)saddr)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -1005,7 +1037,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re if (!create) return NULL; - x = xfrm_state_alloc(); + x = xfrm_state_alloc(net); if (likely(x)) { switch (family) { case AF_INET: @@ -1018,16 +1050,12 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re break; case AF_INET6: - ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, - (struct in6_addr *)daddr); - ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, - (struct in6_addr *)saddr); + *(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr; + *(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; - ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, - (struct in6_addr *)saddr); - ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, - (struct in6_addr *)daddr); + *(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr; + *(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr; break; } @@ -1036,55 +1064,61 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; + x->mark.v = m->v; + x->mark.m = m->m; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; - add_timer(&x->timer); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(daddr, saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); - xfrm_state_num++; + net->xfrm.state_num++; - xfrm_hash_grow_check(x->bydst.next != NULL); + xfrm_hash_grow_check(net, x->bydst.next != NULL); } return x; } -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_add(struct xfrm_state *x) { - struct xfrm_state *x1; + struct net *net = xs_net(x); + struct xfrm_state *x1, *to_put; int family; int err; + u32 mark = x->mark.v & x->mark.m; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; - spin_lock_bh(&xfrm_state_lock); + to_put = NULL; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { - xfrm_state_put(x1); + to_put = x1; x1 = NULL; err = -EEXIST; goto out; } if (use_spi && x->km.seq) { - x1 = __xfrm_find_acq_byseq(x->km.seq); + x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || - xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { - xfrm_state_put(x1); + !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) { + to_put = x1; x1 = NULL; } } if (use_spi && !x1) - x1 = __find_acq_core(family, x->props.mode, x->props.reqid, - x->id.proto, + x1 = __find_acq_core(net, &x->mark, family, x->props.mode, + x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); __xfrm_state_bump_genids(x); @@ -1092,24 +1126,27 @@ int xfrm_state_add(struct xfrm_state *x) err = 0; out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (x1) { xfrm_state_delete(x1); xfrm_state_put(x1); } + if (to_put) + xfrm_state_put(to_put); + return err; } EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE -static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) +static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) { - int err = -ENOMEM; - struct xfrm_state *x = xfrm_state_alloc(); + struct net *net = xs_net(orig); + struct xfrm_state *x = xfrm_state_alloc(net); if (!x) - goto error; + goto out; memcpy(&x->id, &orig->id, sizeof(x->id)); memcpy(&x->sel, &orig->sel, sizeof(x->sel)); @@ -1121,12 +1158,17 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) x->props.saddr = orig->props.saddr; if (orig->aalg) { - x->aalg = xfrm_algo_clone(orig->aalg); + x->aalg = xfrm_algo_auth_clone(orig->aalg); if (!x->aalg) goto error; } x->props.aalgo = orig->props.aalgo; + if (orig->aead) { + x->aead = xfrm_algo_aead_clone(orig->aead); + if (!x->aead) + goto error; + } if (orig->ealg) { x->ealg = xfrm_algo_clone(orig->ealg); if (!x->ealg) @@ -1154,12 +1196,22 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) goto error; } - err = xfrm_init_state(x); - if (err) + if (orig->replay_esn) { + if (xfrm_replay_clone(x, orig)) + goto error; + } + + memcpy(&x->mark, &orig->mark, sizeof(x->mark)); + + if (xfrm_init_state(x) < 0) goto error; x->props.flags = orig->props.flags; + x->props.extra_flags = orig->props.extra_flags; + x->tfcpad = orig->tfcpad; + x->replay_maxdiff = orig->replay_maxdiff; + x->replay_maxage = orig->replay_maxage; x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; x->km.seq = orig->km.seq; @@ -1167,71 +1219,64 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) return x; error: - if (errp) - *errp = err; - if (x) { - kfree(x->aalg); - kfree(x->ealg); - kfree(x->calg); - kfree(x->encap); - kfree(x->coaddr); - } - kfree(x); + xfrm_state_put(x); +out: return NULL; } -/* xfrm_state_lock is held */ -struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) { unsigned int h; - struct xfrm_state *x; - struct hlist_node *entry; + struct xfrm_state *x = NULL; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); if (m->reqid) { - h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr, + h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; if (m->reqid && x->props.reqid != m->reqid) continue; - if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, - m->old_family) || - xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, - m->old_family)) + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) continue; xfrm_state_hold(x); - return x; + break; } } else { - h = xfrm_src_hash(&m->old_daddr, &m->old_saddr, + h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; - if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, - m->old_family) || - xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, - m->old_family)) + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) continue; xfrm_state_hold(x); - return x; + break; } } - return NULL; + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + return x; } EXPORT_SYMBOL(xfrm_migrate_state_find); -struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, - struct xfrm_migrate *m) +struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, + struct xfrm_migrate *m) { struct xfrm_state *xc; - int err; - xc = xfrm_state_clone(x, &err); + xc = xfrm_state_clone(x); if (!xc) return NULL; @@ -1239,18 +1284,18 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); /* add state */ - if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) { + if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) { /* a care is needed when the destination address of the state is to be updated as it is a part of triplet */ xfrm_state_insert(xc); } else { - if ((err = xfrm_state_add(xc)) < 0) + if (xfrm_state_add(xc) < 0) goto error; } return xc; error: - kfree(xc); + xfrm_state_put(xc); return NULL; } EXPORT_SYMBOL(xfrm_state_migrate); @@ -1258,11 +1303,14 @@ EXPORT_SYMBOL(xfrm_state_migrate); int xfrm_state_update(struct xfrm_state *x) { - struct xfrm_state *x1; + struct xfrm_state *x1, *to_put; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); + struct net *net = xs_net(x); + + to_put = NULL; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x1 = __xfrm_state_locate(x, use_spi, x->props.family); err = -ESRCH; @@ -1270,7 +1318,7 @@ int xfrm_state_update(struct xfrm_state *x) goto out; if (xfrm_state_kern(x1)) { - xfrm_state_put(x1); + to_put = x1; err = -EEXIST; goto out; } @@ -1282,7 +1330,10 @@ int xfrm_state_update(struct xfrm_state *x) err = 0; out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + if (to_put) + xfrm_state_put(to_put); if (err) return err; @@ -1306,11 +1357,13 @@ out: memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - mod_timer(&x1->timer, jiffies + HZ); + tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); if (x1->curlft.use_time) xfrm_state_check_expire(x1); err = 0; + x->km.state = XFRM_STATE_DEAD; + __xfrm_state_put(x); } spin_unlock_bh(&x1->lock); @@ -1325,13 +1378,10 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (!x->curlft.use_time) x->curlft.use_time = get_seconds(); - if (x->km.state != XFRM_STATE_VALID) - return -EINVAL; - if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - mod_timer(&x->timer, jiffies); + tasklet_hrtimer_start(&x->mtimer, ktime_set(0, 0), HRTIMER_MODE_REL); return -EINVAL; } @@ -1346,41 +1396,42 @@ int xfrm_state_check_expire(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_check_expire); struct xfrm_state * -xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, - unsigned short family) +xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, + u8 proto, unsigned short family) { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup(daddr, spi, proto, family); - spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * -xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_state_lookup_byaddr(struct net *net, u32 mark, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); - spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * -xfrm_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create, unsigned short family) +xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, + u8 proto, const xfrm_address_t *daddr, + const xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); - x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); - spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } @@ -1389,17 +1440,17 @@ EXPORT_SYMBOL(xfrm_find_acq); #ifdef CONFIG_XFRM_SUB_POLICY int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, - unsigned short family) + unsigned short family, struct net *net) { int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/ if (afinfo->tmpl_sort) err = afinfo->tmpl_sort(dst, src, n); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_put_afinfo(afinfo); return err; } @@ -1411,13 +1462,15 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, { int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + struct net *net = xs_net(*src); + if (!afinfo) return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); if (afinfo->state_sort) err = afinfo->state_sort(dst, src, n); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_put_afinfo(afinfo); return err; } @@ -1426,16 +1479,16 @@ EXPORT_SYMBOL(xfrm_state_sort); /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { int i; - for (i = 0; i <= xfrm_state_hmask; i++) { - struct hlist_node *entry; + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && + (mark & x->mark.m) == x->mark.v && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; @@ -1445,13 +1498,13 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); - x = __xfrm_find_acq_byseq(seq); - spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x = __xfrm_find_acq_byseq(net, mark, seq); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_find_acq_byseq); @@ -1459,23 +1512,49 @@ EXPORT_SYMBOL(xfrm_find_acq_byseq); u32 xfrm_get_acqseq(void) { u32 res; - static u32 acqseq; - static DEFINE_SPINLOCK(acqseq_lock); + static atomic_t acqseq; + + do { + res = atomic_inc_return(&acqseq); + } while (!res); - spin_lock_bh(&acqseq_lock); - res = (++acqseq ? : ++acqseq); - spin_unlock_bh(&acqseq_lock); return res; } EXPORT_SYMBOL(xfrm_get_acqseq); +int verify_spi_info(u8 proto, u32 min, u32 max) +{ + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + break; + + case IPPROTO_COMP: + /* IPCOMP spi is 16-bits. */ + if (max >= 0x10000) + return -EINVAL; + break; + + default: + return -EINVAL; + } + + if (min > max) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(verify_spi_info); + int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { + struct net *net = xs_net(x); unsigned int h; struct xfrm_state *x0; int err = -ENOENT; __be32 minspi = htonl(low); __be32 maxspi = htonl(high); + u32 mark = x->mark.v & x->mark.m; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_DEAD) @@ -1488,7 +1567,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) err = -ENOENT; if (minspi == maxspi) { - x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); goto unlock; @@ -1496,9 +1575,9 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) x->id.spi = minspi; } else { u32 spi = 0; - for (h=0; h<high-low+1; h++) { - spi = low + net_random()%(high-low+1); - x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family); + for (h = 0; h < high-low+1; h++) { + spi = low + prandom_u32()%(high-low+1); + x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); break; @@ -1507,10 +1586,10 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) } } if (x->id.spi) { - spin_lock_bh(&xfrm_state_lock); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); - spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; } @@ -1522,97 +1601,98 @@ unlock: } EXPORT_SYMBOL(xfrm_alloc_spi); -int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), +static bool __xfrm_state_filter_match(struct xfrm_state *x, + struct xfrm_address_filter *filter) +{ + if (filter) { + if ((filter->family == AF_INET || + filter->family == AF_INET6) && + x->props.family != filter->family) + return false; + + return addr_match(&x->props.saddr, &filter->saddr, + filter->splen) && + addr_match(&x->id.daddr, &filter->daddr, + filter->dplen); + } + return true; +} + +int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, + int (*func)(struct xfrm_state *, int, void*), void *data) { - int i; - struct xfrm_state *x, *last = NULL; - struct hlist_node *entry; - int count = 0; + struct xfrm_state *state; + struct xfrm_state_walk *x; int err = 0; - spin_lock_bh(&xfrm_state_lock); - for (i = 0; i <= xfrm_state_hmask; i++) { - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (!xfrm_id_proto_match(x->id.proto, proto)) - continue; - if (last) { - err = func(last, count, data); - if (err) - goto out; - } - last = x; - count++; + if (walk->seq != 0 && list_empty(&walk->all)) + return 0; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + if (list_empty(&walk->all)) + x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all); + else + x = list_entry(&walk->all, struct xfrm_state_walk, all); + list_for_each_entry_from(x, &net->xfrm.state_all, all) { + if (x->state == XFRM_STATE_DEAD) + continue; + state = container_of(x, struct xfrm_state, km); + if (!xfrm_id_proto_match(state->id.proto, walk->proto)) + continue; + if (!__xfrm_state_filter_match(state, walk->filter)) + continue; + err = func(state, walk->seq, data); + if (err) { + list_move_tail(&walk->all, &x->all); + goto out; } + walk->seq++; } - if (count == 0) { + if (walk->seq == 0) { err = -ENOENT; goto out; } - err = func(last, 0, data); + list_del_init(&walk->all); out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_walk); - -void xfrm_replay_notify(struct xfrm_state *x, int event) +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto, + struct xfrm_address_filter *filter) { - struct km_event c; - /* we send notify messages in case - * 1. we updated on of the sequence numbers, and the seqno difference - * is at least x->replay_maxdiff, in this case we also update the - * timeout of our timer function - * 2. if x->replay_maxage has elapsed since last update, - * and there were changes - * - * The state structure must be locked! - */ - - switch (event) { - case XFRM_REPLAY_UPDATE: - if (x->replay_maxdiff && - (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && - (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { - if (x->xflags & XFRM_TIME_DEFER) - event = XFRM_REPLAY_TIMEOUT; - else - return; - } - - break; - - case XFRM_REPLAY_TIMEOUT: - if ((x->replay.seq == x->preplay.seq) && - (x->replay.bitmap == x->preplay.bitmap) && - (x->replay.oseq == x->preplay.oseq)) { - x->xflags |= XFRM_TIME_DEFER; - return; - } + INIT_LIST_HEAD(&walk->all); + walk->proto = proto; + walk->state = XFRM_STATE_DEAD; + walk->seq = 0; + walk->filter = filter; +} +EXPORT_SYMBOL(xfrm_state_walk_init); - break; - } +void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net) +{ + kfree(walk->filter); - memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); - c.event = XFRM_MSG_NEWAE; - c.data.aevent = event; - km_state_notify(x, &c); + if (list_empty(&walk->all)) + return; - if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) - x->xflags &= ~XFRM_TIME_DEFER; + spin_lock_bh(&net->xfrm.xfrm_state_lock); + list_del(&walk->all); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } +EXPORT_SYMBOL(xfrm_state_walk_done); static void xfrm_replay_timer_handler(unsigned long data) { - struct xfrm_state *x = (struct xfrm_state*)data; + struct xfrm_state *x = (struct xfrm_state *)data; spin_lock(&x->lock); if (x->km.state == XFRM_STATE_VALID) { - if (xfrm_aevent_is_on()) - xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); + if (xfrm_aevent_is_on(xs_net(x))) + x->repl->notify(x, XFRM_REPLAY_TIMEOUT); else x->xflags |= XFRM_TIME_DEFER; } @@ -1620,95 +1700,40 @@ static void xfrm_replay_timer_handler(unsigned long data) spin_unlock(&x->lock); } -int xfrm_replay_check(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq) -{ - u32 diff; - u32 seq = ntohl(net_seq); - - if (unlikely(seq == 0)) - goto err; - - if (likely(seq > x->replay.seq)) - return 0; - - diff = x->replay.seq - seq; - if (diff >= min_t(unsigned int, x->props.replay_window, - sizeof(x->replay.bitmap) * 8)) { - x->stats.replay_window++; - goto err; - } - - if (x->replay.bitmap & (1U << diff)) { - x->stats.replay++; - goto err; - } - return 0; - -err: - xfrm_audit_state_replay(x, skb, net_seq); - return -EINVAL; -} - -void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) -{ - u32 diff; - u32 seq = ntohl(net_seq); - - if (seq > x->replay.seq) { - diff = seq - x->replay.seq; - if (diff < x->props.replay_window) - x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; - else - x->replay.bitmap = 1; - x->replay.seq = seq; - } else { - diff = x->replay.seq - seq; - x->replay.bitmap |= (1U << diff); - } - - if (xfrm_aevent_is_on()) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); -} - static LIST_HEAD(xfrm_km_list); -static DEFINE_RWLOCK(xfrm_km_lock); -void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) if (km->notify_policy) km->notify_policy(xp, dir, c); - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); } -void km_state_notify(struct xfrm_state *x, struct km_event *c) +void km_state_notify(struct xfrm_state *x, const struct km_event *c) { struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) if (km->notify) km->notify(x, c); - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); } EXPORT_SYMBOL(km_policy_notify); EXPORT_SYMBOL(km_state_notify); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid) +void km_state_expired(struct xfrm_state *x, int hard, u32 portid) { struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_EXPIRE; km_state_notify(x, &c); - - if (hard) - wake_up(&km_waitq); } EXPORT_SYMBOL(km_state_expired); @@ -1721,13 +1746,13 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) int err = -EINVAL, acqret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { - acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT); + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { + acqret = km->acquire(x, t, pol); if (!acqret) err = acqret; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_query); @@ -1737,73 +1762,89 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) int err = -EINVAL; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->new_mapping) err = km->new_mapping(x, ipaddr, sport); if (!err) break; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_new_mapping); -void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) +void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) { struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_POLEXPIRE; km_policy_notify(pol, dir, &c); - - if (hard) - wake_up(&km_waitq); } EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE -int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) +int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) { int err = -EINVAL; int ret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->migrate) { - ret = km->migrate(sel, dir, type, m, num_migrate); + ret = km->migrate(sel, dir, type, m, num_migrate, k); if (!ret) err = ret; } } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_migrate); #endif -int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { int err = -EINVAL; int ret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->report) { - ret = km->report(proto, sel, addr); + ret = km->report(net, proto, sel, addr); if (!ret) err = ret; } } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_report); +bool km_is_alive(const struct km_event *c) +{ + struct xfrm_mgr *km; + bool is_alive = false; + + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { + if (km->is_alive && km->is_alive(c)) { + is_alive = true; + break; + } + } + rcu_read_unlock(); + + return is_alive; +} +EXPORT_SYMBOL(km_is_alive); + int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; @@ -1823,14 +1864,14 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen goto out; err = -EINVAL; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); if (err >= 0) { xfrm_sk_policy_insert(sk, err, pol); @@ -1844,20 +1885,23 @@ out: } EXPORT_SYMBOL(xfrm_user_policy); +static DEFINE_SPINLOCK(xfrm_km_lock); + int xfrm_register_km(struct xfrm_mgr *km) { - write_lock_bh(&xfrm_km_lock); - list_add_tail(&km->list, &xfrm_km_list); - write_unlock_bh(&xfrm_km_lock); + spin_lock_bh(&xfrm_km_lock); + list_add_tail_rcu(&km->list, &xfrm_km_list); + spin_unlock_bh(&xfrm_km_lock); return 0; } EXPORT_SYMBOL(xfrm_register_km); int xfrm_unregister_km(struct xfrm_mgr *km) { - write_lock_bh(&xfrm_km_lock); - list_del(&km->list); - write_unlock_bh(&xfrm_km_lock); + spin_lock_bh(&xfrm_km_lock); + list_del_rcu(&km->list); + spin_unlock_bh(&xfrm_km_lock); + synchronize_rcu(); return 0; } EXPORT_SYMBOL(xfrm_unregister_km); @@ -1869,12 +1913,12 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_state_afinfo_lock); + spin_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else - xfrm_state_afinfo[afinfo->family] = afinfo; - write_unlock_bh(&xfrm_state_afinfo_lock); + rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo); + spin_unlock_bh(&xfrm_state_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_state_register_afinfo); @@ -1886,34 +1930,34 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_state_afinfo_lock); + spin_lock_bh(&xfrm_state_afinfo_lock); if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; else - xfrm_state_afinfo[afinfo->family] = NULL; + RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL); } - write_unlock_bh(&xfrm_state_afinfo_lock); + spin_unlock_bh(&xfrm_state_afinfo_lock); + synchronize_rcu(); return err; } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) +struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) return NULL; - read_lock(&xfrm_state_afinfo_lock); - afinfo = xfrm_state_afinfo[family]; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_state_afinfo[family]); if (unlikely(!afinfo)) - read_unlock(&xfrm_state_afinfo_lock); + rcu_read_unlock(); return afinfo; } -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) - __releases(xfrm_state_afinfo_lock) +void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { - read_unlock(&xfrm_state_afinfo_lock); + rcu_read_unlock(); } /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ @@ -1945,7 +1989,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) return res; } -int xfrm_init_state(struct xfrm_state *x) +int __xfrm_init_state(struct xfrm_state *x, bool init_replay) { struct xfrm_state_afinfo *afinfo; struct xfrm_mode *inner_mode; @@ -1982,8 +2026,9 @@ int xfrm_init_state(struct xfrm_state *x) x->inner_mode = inner_mode; } else { struct xfrm_mode *inner_mode_iaf; + int iafamily = AF_INET; - inner_mode = xfrm_get_mode(x->props.mode, AF_INET); + inner_mode = xfrm_get_mode(x->props.mode, x->props.family); if (inner_mode == NULL) goto error; @@ -1991,22 +2036,17 @@ int xfrm_init_state(struct xfrm_state *x) xfrm_put_mode(inner_mode); goto error; } + x->inner_mode = inner_mode; - inner_mode_iaf = xfrm_get_mode(x->props.mode, AF_INET6); - if (inner_mode_iaf == NULL) - goto error; + if (x->props.family == AF_INET) + iafamily = AF_INET6; - if (!(inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)) { - xfrm_put_mode(inner_mode_iaf); - goto error; - } - - if (x->props.family == AF_INET) { - x->inner_mode = inner_mode; - x->inner_mode_iaf = inner_mode_iaf; - } else { - x->inner_mode = inner_mode_iaf; - x->inner_mode_iaf = inner_mode; + inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily); + if (inner_mode_iaf) { + if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL) + x->inner_mode_iaf = inner_mode_iaf; + else + xfrm_put_mode(inner_mode_iaf); } } @@ -2019,8 +2059,16 @@ int xfrm_init_state(struct xfrm_state *x) goto error; x->outer_mode = xfrm_get_mode(x->props.mode, family); - if (x->outer_mode == NULL) + if (x->outer_mode == NULL) { + err = -EPROTONOSUPPORT; goto error; + } + + if (init_replay) { + err = xfrm_init_replay(x); + if (err) + goto error; + } x->km.state = XFRM_STATE_VALID; @@ -2028,22 +2076,66 @@ error: return err; } +EXPORT_SYMBOL(__xfrm_init_state); + +int xfrm_init_state(struct xfrm_state *x) +{ + return __xfrm_init_state(x, true); +} + EXPORT_SYMBOL(xfrm_init_state); -void __init xfrm_state_init(void) +int __net_init xfrm_state_init(struct net *net) { unsigned int sz; + INIT_LIST_HEAD(&net->xfrm.state_all); + sz = sizeof(struct hlist_head) * 8; - xfrm_state_bydst = xfrm_hash_alloc(sz); - xfrm_state_bysrc = xfrm_hash_alloc(sz); - xfrm_state_byspi = xfrm_hash_alloc(sz); - if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) - panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); - xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + net->xfrm.state_bydst = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bydst) + goto out_bydst; + net->xfrm.state_bysrc = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bysrc) + goto out_bysrc; + net->xfrm.state_byspi = xfrm_hash_alloc(sz); + if (!net->xfrm.state_byspi) + goto out_byspi; + net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + + net->xfrm.state_num = 0; + INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); + INIT_HLIST_HEAD(&net->xfrm.state_gc_list); + INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); + spin_lock_init(&net->xfrm.xfrm_state_lock); + return 0; + +out_byspi: + xfrm_hash_free(net->xfrm.state_bysrc, sz); +out_bysrc: + xfrm_hash_free(net->xfrm.state_bydst, sz); +out_bydst: + return -ENOMEM; +} + +void xfrm_state_fini(struct net *net) +{ + unsigned int sz; + + flush_work(&net->xfrm.state_hash_work); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + flush_work(&net->xfrm.state_gc_work); - INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); + WARN_ON(!list_empty(&net->xfrm.state_all)); + + sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.state_byspi)); + xfrm_hash_free(net->xfrm.state_byspi, sz); + WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); + xfrm_hash_free(net->xfrm.state_bysrc, sz); + WARN_ON(!hlist_empty(net->xfrm.state_bydst)); + xfrm_hash_free(net->xfrm.state_bydst, sz); } #ifdef CONFIG_AUDITSYSCALL @@ -2057,18 +2149,14 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x, audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); - switch(x->props.family) { + switch (x->props.family) { case AF_INET: - audit_log_format(audit_buf, - " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT, - NIPQUAD(x->props.saddr.a4), - NIPQUAD(x->id.daddr.a4)); + audit_log_format(audit_buf, " src=%pI4 dst=%pI4", + &x->props.saddr.a4, &x->id.daddr.a4); break; case AF_INET6: - audit_log_format(audit_buf, - " src=" NIP6_FMT " dst=" NIP6_FMT, - NIP6(*(struct in6_addr *)x->props.saddr.a6), - NIP6(*(struct in6_addr *)x->id.daddr.a6)); + audit_log_format(audit_buf, " src=%pI6 dst=%pI6", + x->props.saddr.a6, x->id.daddr.a6); break; } @@ -2078,24 +2166,20 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x, static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, struct audit_buffer *audit_buf) { - struct iphdr *iph4; - struct ipv6hdr *iph6; + const struct iphdr *iph4; + const struct ipv6hdr *iph6; switch (family) { case AF_INET: iph4 = ip_hdr(skb); - audit_log_format(audit_buf, - " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT, - NIPQUAD(iph4->saddr), - NIPQUAD(iph4->daddr)); + audit_log_format(audit_buf, " src=%pI4 dst=%pI4", + &iph4->saddr, &iph4->daddr); break; case AF_INET6: iph6 = ipv6_hdr(skb); audit_log_format(audit_buf, - " src=" NIP6_FMT " dst=" NIP6_FMT - " flowlbl=0x%x%x%x", - NIP6(iph6->saddr), - NIP6(iph6->daddr), + " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x", + &iph6->saddr, &iph6->daddr, iph6->flow_lbl[0] & 0x0f, iph6->flow_lbl[1], iph6->flow_lbl[2]); @@ -2103,30 +2187,28 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } } -void xfrm_audit_state_add(struct xfrm_state *x, int result, - u32 auid, u32 secid) +void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_state_add); -void xfrm_audit_state_delete(struct xfrm_state *x, int result, - u32 auid, u32 secid) +void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); @@ -2151,7 +2233,7 @@ void xfrm_audit_state_replay_overflow(struct xfrm_state *x, } EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow); -static void xfrm_audit_state_replay(struct xfrm_state *x, +void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { struct audit_buffer *audit_buf; @@ -2166,6 +2248,7 @@ static void xfrm_audit_state_replay(struct xfrm_state *x, spi, spi, ntohl(net_seq)); audit_log_end(audit_buf); } +EXPORT_SYMBOL_GPL(xfrm_audit_state_replay); void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family) { diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c new file mode 100644 index 00000000000..05a6e3d9c25 --- /dev/null +++ b/net/xfrm/xfrm_sysctl.c @@ -0,0 +1,86 @@ +#include <linux/sysctl.h> +#include <linux/slab.h> +#include <net/net_namespace.h> +#include <net/xfrm.h> + +static void __net_init __xfrm_sysctl_init(struct net *net) +{ + net->xfrm.sysctl_aevent_etime = XFRM_AE_ETIME; + net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE; + net->xfrm.sysctl_larval_drop = 1; + net->xfrm.sysctl_acq_expires = 30; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table xfrm_table[] = { + { + .procname = "xfrm_aevent_etime", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "xfrm_aevent_rseqth", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "xfrm_larval_drop", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "xfrm_acq_expires", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + {} +}; + +int __net_init xfrm_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + __xfrm_sysctl_init(net); + + table = kmemdup(xfrm_table, sizeof(xfrm_table), GFP_KERNEL); + if (!table) + goto out_kmemdup; + table[0].data = &net->xfrm.sysctl_aevent_etime; + table[1].data = &net->xfrm.sysctl_aevent_rseqth; + table[2].data = &net->xfrm.sysctl_larval_drop; + table[3].data = &net->xfrm.sysctl_acq_expires; + + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; + + net->xfrm.sysctl_hdr = register_net_sysctl(net, "net/core", table); + if (!net->xfrm.sysctl_hdr) + goto out_register; + return 0; + +out_register: + kfree(table); +out_kmemdup: + return -ENOMEM; +} + +void __net_exit xfrm_sysctl_fini(struct net *net) +{ + struct ctl_table *table; + + table = net->xfrm.sysctl_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->xfrm.sysctl_hdr); + kfree(table); +} +#else +int __net_init xfrm_sysctl_init(struct net *net) +{ + __xfrm_sysctl_init(net); + return 0; +} +#endif diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5d96f2728dc..d4db6ebb089 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -26,16 +26,12 @@ #include <net/sock.h> #include <net/xfrm.h> #include <net/netlink.h> +#include <net/ah.h> #include <asm/uaccess.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #include <linux/in6.h> #endif -static inline int aead_len(struct xfrm_algo_aead *alg) -{ - return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); -} - static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) { struct nlattr *rt = attrs[type]; @@ -50,19 +46,8 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) switch (type) { case XFRMA_ALG_AUTH: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "digest_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_CRYPT: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "cipher_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_COMP: - /* Zero length keys are legal. */ break; default: @@ -73,6 +58,22 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; } +static int verify_auth_trunc(struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_ALG_AUTH_TRUNC]; + struct xfrm_algo_auth *algp; + + if (!rt) + return 0; + + algp = nla_data(rt); + if (nla_len(rt) < xfrm_alg_auth_len(algp)) + return -EINVAL; + + algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + return 0; +} + static int verify_aead(struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_ALG_AEAD]; @@ -113,6 +114,38 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs) return 0; } +static inline int verify_replay(struct xfrm_usersa_info *p, + struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + struct xfrm_replay_state_esn *rs; + + if (p->flags & XFRM_STATE_ESN) { + if (!rt) + return -EINVAL; + + rs = nla_data(rt); + + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; + + if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; + } + + if (!rt) + return 0; + + /* As only ESP and AH support ESN feature. */ + if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) + return -EINVAL; + + if (p->replay_window != 0) + return -EINVAL; + + return 0; +} static int verify_newsa_info(struct xfrm_usersa_info *p, struct nlattr **attrs) @@ -125,7 +158,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, break; case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) break; #else err = -EAFNOSUPPORT; @@ -139,10 +172,12 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->id.proto) { case IPPROTO_AH: - if (!attrs[XFRMA_ALG_AUTH] || + if ((!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_AUTH_TRUNC]) || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || - attrs[XFRMA_ALG_COMP]) + attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_TFCPAD]) goto out; break; @@ -150,32 +185,42 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, if (attrs[XFRMA_ALG_COMP]) goto out; if (!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_AUTH_TRUNC] && !attrs[XFRMA_ALG_CRYPT] && !attrs[XFRMA_ALG_AEAD]) goto out; if ((attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT]) && attrs[XFRMA_ALG_AEAD]) goto out; + if (attrs[XFRMA_TFCPAD] && + p->mode != XFRM_MODE_TUNNEL) + goto out; break; case IPPROTO_COMP: if (!attrs[XFRMA_ALG_COMP] || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_AUTH] || - attrs[XFRMA_ALG_CRYPT]) + attrs[XFRMA_ALG_AUTH_TRUNC] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_TFCPAD] || + (ntohl(p->id.spi) >= 0x10000)) goto out; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: if (attrs[XFRMA_ALG_COMP] || attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ENCAP] || attrs[XFRMA_SEC_CTX] || + attrs[XFRMA_TFCPAD] || !attrs[XFRMA_COADDR]) goto out; break; @@ -187,6 +232,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, if ((err = verify_aead(attrs))) goto out; + if ((err = verify_auth_trunc(attrs))) + goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) @@ -195,6 +242,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(attrs))) goto out; + if ((err = verify_replay(p, attrs))) + goto out; err = -EINVAL; switch (p->mode) { @@ -215,7 +264,7 @@ out: } static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, - struct xfrm_algo_desc *(*get_byname)(char *, int), + struct xfrm_algo_desc *(*get_byname)(const char *, int), struct nlattr *rta) { struct xfrm_algo *p, *ualg; @@ -240,6 +289,67 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return 0; } +static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props, + struct nlattr *rta) +{ + struct xfrm_algo *ualg; + struct xfrm_algo_auth *p; + struct xfrm_algo_desc *algo; + + if (!rta) + return 0; + + ualg = nla_data(rta); + + algo = xfrm_aalg_get_byname(ualg->alg_name, 1); + if (!algo) + return -ENOSYS; + *props = algo->desc.sadb_alg_id; + + p = kmalloc(sizeof(*p) + (ualg->alg_key_len + 7) / 8, GFP_KERNEL); + if (!p) + return -ENOMEM; + + strcpy(p->alg_name, algo->name); + p->alg_key_len = ualg->alg_key_len; + p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; + memcpy(p->alg_key, ualg->alg_key, (ualg->alg_key_len + 7) / 8); + + *algpp = p; + return 0; +} + +static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props, + struct nlattr *rta) +{ + struct xfrm_algo_auth *p, *ualg; + struct xfrm_algo_desc *algo; + + if (!rta) + return 0; + + ualg = nla_data(rta); + + algo = xfrm_aalg_get_byname(ualg->alg_name, 1); + if (!algo) + return -ENOSYS; + if ((ualg->alg_trunc_len / 8) > MAX_AH_AUTH_LEN || + ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) + return -EINVAL; + *props = algo->desc.sadb_alg_id; + + p = kmemdup(ualg, xfrm_alg_auth_len(ualg), GFP_KERNEL); + if (!p) + return -ENOMEM; + + strcpy(p->alg_name, algo->name); + if (!p->alg_trunc_len) + p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; + + *algpp = p; + return 0; +} + static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, struct nlattr *rta) { @@ -265,6 +375,57 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, return 0; } +static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn, + struct nlattr *rp) +{ + struct xfrm_replay_state_esn *up; + int ulen; + + if (!replay_esn || !rp) + return 0; + + up = nla_data(rp); + ulen = xfrm_replay_state_esn_len(up); + + if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + return -EINVAL; + + return 0; +} + +static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, + struct xfrm_replay_state_esn **preplay_esn, + struct nlattr *rta) +{ + struct xfrm_replay_state_esn *p, *pp, *up; + int klen, ulen; + + if (!rta) + return 0; + + up = nla_data(rta); + klen = xfrm_replay_state_esn_len(up); + ulen = nla_len(rta) >= klen ? klen : sizeof(*up); + + p = kzalloc(klen, GFP_KERNEL); + if (!p) + return -ENOMEM; + + pp = kzalloc(klen, GFP_KERNEL); + if (!pp) { + kfree(p); + return -ENOMEM; + } + + memcpy(p, up, ulen); + memcpy(pp, up, ulen); + + *replay_esn = p; + *preplay_esn = pp; + + return 0; +} + static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { int len = 0; @@ -282,15 +443,15 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->sel, &p->sel, sizeof(x->sel)); memcpy(&x->lft, &p->lft, sizeof(x->lft)); x->props.mode = p->mode; - x->props.replay_window = p->replay_window; + x->props.replay_window = min_t(unsigned int, p->replay_window, + sizeof(x->replay.bitmap) * 8); x->props.reqid = p->reqid; x->props.family = p->family; memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; - if (x->props.mode == XFRM_MODE_TRANSPORT) + if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC)) x->sel.family = p->family; - } /* @@ -298,13 +459,24 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * * somehow made shareable and move it to xfrm_state.c - JHS * */ -static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, + int update_esn) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; + if (re) { + struct xfrm_replay_state_esn *replay_esn; + replay_esn = nla_data(re); + memcpy(x->replay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + memcpy(x->preplay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + } + if (rp) { struct xfrm_replay_state *replay; replay = nla_data(rp); @@ -328,11 +500,12 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) x->replay_maxdiff = nla_get_u32(rt); } -static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, +static struct xfrm_state *xfrm_state_construct(struct net *net, + struct xfrm_usersa_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; if (!x) @@ -340,13 +513,20 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, copy_from_user_state(x, p); + if (attrs[XFRMA_SA_EXTRA_FLAGS]) + x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); + if ((err = attach_aead(&x->aead, &x->props.ealgo, attrs[XFRMA_ALG_AEAD]))) goto error; - if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, - xfrm_aalg_get_byname, - attrs[XFRMA_ALG_AUTH]))) + if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo, + attrs[XFRMA_ALG_AUTH_TRUNC]))) goto error; + if (!x->props.aalgo) { + if ((err = attach_auth(&x->aalg, &x->props.aalgo, + attrs[XFRMA_ALG_AUTH]))) + goto error; + } if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, xfrm_ealg_get_byname, attrs[XFRMA_ALG_CRYPT]))) @@ -363,6 +543,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; } + if (attrs[XFRMA_TFCPAD]) + x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]); + if (attrs[XFRMA_COADDR]) { x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), sizeof(*x->coaddr), GFP_KERNEL); @@ -370,7 +553,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; } - err = xfrm_init_state(x); + xfrm_mark_get(attrs, &x->mark); + + err = __xfrm_init_state(x, false); if (err) goto error; @@ -378,17 +563,20 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) goto error; + if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, + attrs[XFRMA_REPLAY_ESN_VAL]))) + goto error; + x->km.seq = p->seq; - x->replay_maxdiff = sysctl_xfrm_aevent_rseqth; + x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ - x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; - x->preplay.bitmap = 0; - x->preplay.seq = x->replay.seq+x->replay_maxdiff; - x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; + x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; - /* override default values from above */ + if ((err = xfrm_init_replay(x))) + goto error; - xfrm_update_ae_params(x, attrs); + /* override default values from above */ + xfrm_update_ae_params(x, attrs, 0); return x; @@ -403,6 +591,7 @@ error_no_put: static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_usersa_info *p = nlmsg_data(nlh); struct xfrm_state *x; int err; @@ -412,7 +601,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - x = xfrm_state_construct(p, attrs, &err); + x = xfrm_state_construct(net, p, attrs, &err); if (!x) return err; @@ -422,8 +611,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); - xfrm_audit_state_add(x, err ? 0 : 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -432,7 +620,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, } c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -441,16 +629,19 @@ out: return err; } -static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, +static struct xfrm_state *xfrm_user_state_lookup(struct net *net, + struct xfrm_usersa_id *p, struct nlattr **attrs, int *errp) { struct xfrm_state *x = NULL; + struct xfrm_mark m; int err; + u32 mark = xfrm_mark_get(attrs, &m); if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_state_lookup(net, mark, &p->daddr, p->spi, p->proto, p->family); } else { xfrm_address_t *saddr = NULL; @@ -461,8 +652,9 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, } err = -ESRCH; - x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, - p->family); + x = xfrm_state_lookup_byaddr(net, mark, + &p->daddr, saddr, + p->proto, p->family); } out: @@ -474,12 +666,13 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - x = xfrm_user_state_lookup(p, attrs, &err); + x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) return err; @@ -497,19 +690,19 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); out: - xfrm_audit_state_delete(x, err ? 0 : 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; } static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { + memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); @@ -529,8 +722,6 @@ struct xfrm_dump_info { struct sk_buff *out_skb; u32 nlmsg_seq; u16 nlmsg_flags; - int start_idx; - int this_idx; }; static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) @@ -554,38 +745,97 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) return 0; } -/* Don't change this without updating xfrm_sa_len! */ -static int copy_to_user_state_extra(struct xfrm_state *x, - struct xfrm_usersa_info *p, - struct sk_buff *skb) +static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) { - copy_to_user_state(x, p); + struct xfrm_algo *algo; + struct nlattr *nla; - if (x->coaddr) - NLA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + nla = nla_reserve(skb, XFRMA_ALG_AUTH, + sizeof(*algo) + (auth->alg_key_len + 7) / 8); + if (!nla) + return -EMSGSIZE; - if (x->lastused) - NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused); + algo = nla_data(nla); + strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); + memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); + algo->alg_key_len = auth->alg_key_len; - if (x->aead) - NLA_PUT(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); - if (x->aalg) - NLA_PUT(skb, XFRMA_ALG_AUTH, xfrm_alg_len(x->aalg), x->aalg); - if (x->ealg) - NLA_PUT(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); - if (x->calg) - NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + return 0; +} - if (x->encap) - NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); +/* Don't change this without updating xfrm_sa_len! */ +static int copy_to_user_state_extra(struct xfrm_state *x, + struct xfrm_usersa_info *p, + struct sk_buff *skb) +{ + int ret = 0; - if (x->security && copy_sec_ctx(x->security, skb) < 0) - goto nla_put_failure; + copy_to_user_state(x, p); - return 0; + if (x->props.extra_flags) { + ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS, + x->props.extra_flags); + if (ret) + goto out; + } -nla_put_failure: - return -EMSGSIZE; + if (x->coaddr) { + ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + if (ret) + goto out; + } + if (x->lastused) { + ret = nla_put_u64(skb, XFRMA_LASTUSED, x->lastused); + if (ret) + goto out; + } + if (x->aead) { + ret = nla_put(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); + if (ret) + goto out; + } + if (x->aalg) { + ret = copy_to_user_auth(x->aalg, skb); + if (!ret) + ret = nla_put(skb, XFRMA_ALG_AUTH_TRUNC, + xfrm_alg_auth_len(x->aalg), x->aalg); + if (ret) + goto out; + } + if (x->ealg) { + ret = nla_put(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); + if (ret) + goto out; + } + if (x->calg) { + ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + if (ret) + goto out; + } + if (x->encap) { + ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (ret) + goto out; + } + if (x->tfcpad) { + ret = nla_put_u32(skb, XFRMA_TFCPAD, x->tfcpad); + if (ret) + goto out; + } + ret = xfrm_mark_put(skb, &x->mark); + if (ret) + goto out; + if (x->replay_esn) { + ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); + if (ret) + goto out; + } + if (x->security) + ret = copy_sec_ctx(x->security, skb); +out: + return ret; } static int dump_one_state(struct xfrm_state *x, int count, void *ptr) @@ -597,10 +847,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct nlmsghdr *nlh; int err; - if (sp->this_idx < sp->start_idx) - goto out; - - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -608,31 +855,68 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) p = nlmsg_data(nlh); err = copy_to_user_state_extra(x, p, skb); - if (err) - goto nla_put_failure; - + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } nlmsg_end(skb, nlh); -out: - sp->this_idx++; return 0; +} -nla_put_failure: - nlmsg_cancel(skb, nlh); - return err; +static int xfrm_dump_sa_done(struct netlink_callback *cb) +{ + struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; + struct sock *sk = cb->skb->sk; + struct net *net = sock_net(sk); + + xfrm_state_walk_done(walk, net); + return 0; } +static const struct nla_policy xfrma_policy[XFRMA_MAX+1]; static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); + struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; struct xfrm_dump_info info; + BUILD_BUG_ON(sizeof(struct xfrm_state_walk) > + sizeof(cb->args) - sizeof(cb->args[0])); + info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_state_walk(0, dump_one_state, &info); - cb->args[0] = info.this_idx; + + if (!cb->args[0]) { + struct nlattr *attrs[XFRMA_MAX+1]; + struct xfrm_address_filter *filter = NULL; + u8 proto = 0; + int err; + + cb->args[0] = 1; + + err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, + xfrma_policy); + if (err < 0) + return err; + + if (attrs[XFRMA_ADDRESS_FILTER]) { + filter = kmalloc(sizeof(*filter), GFP_KERNEL); + if (filter == NULL) + return -ENOMEM; + + memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]), + sizeof(*filter)); + } + + if (attrs[XFRMA_PROTO]) + proto = nla_get_u8(attrs[XFRMA_PROTO]); + + xfrm_state_walk_init(walk, proto, filter); + } + + (void) xfrm_state_walk(net, walk, dump_one_state, &info); return skb->len; } @@ -642,6 +926,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) @@ -651,16 +936,30 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; - info.this_idx = info.start_idx = 0; - if (dump_one_state(x, 0, &info)) { + err = dump_one_state(x, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; } +/* A wrapper for nlmsg_multicast() checking that nlsk is still available. + * Must be called with RCU read lock. + */ +static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, + u32 pid, unsigned int group) +{ + struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); + + if (nlsk) + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); + else + return -1; +} + static inline size_t xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) @@ -668,21 +967,23 @@ static inline size_t xfrm_spdinfo_msgsize(void) + nla_total_size(sizeof(struct xfrmu_spdhinfo)); } -static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) +static int build_spdinfo(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; struct xfrmu_spdinfo spc; struct xfrmu_spdhinfo sph; struct nlmsghdr *nlh; + int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); - if (nlh == NULL) /* shouldnt really happen ... */ + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); + if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; f = nlmsg_data(nlh); *f = flags; - xfrm_spd_getinfo(&si); + xfrm_spd_getinfo(net, &si); spc.incnt = si.incnt; spc.outcnt = si.outcnt; spc.fwdcnt = si.fwdcnt; @@ -692,32 +993,34 @@ static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) sph.spdhcnt = si.spdhcnt; sph.spdhmcnt = si.spdhmcnt; - NLA_PUT(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); - NLA_PUT(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); + err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); + if (!err) + err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, spid, seq, *flags) < 0) + if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(xfrm_nl, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static inline size_t xfrm_sadinfo_msgsize(void) @@ -727,61 +1030,66 @@ static inline size_t xfrm_sadinfo_msgsize(void) + nla_total_size(4); /* XFRMA_SAD_CNT */ } -static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) +static int build_sadinfo(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; struct nlmsghdr *nlh; + int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); - if (nlh == NULL) /* shouldnt really happen ... */ + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); + if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; f = nlmsg_data(nlh); *f = flags; - xfrm_sad_getinfo(&si); + xfrm_sad_getinfo(net, &si); sh.sadhmcnt = si.sadhmcnt; sh.sadhcnt = si.sadhcnt; - NLA_PUT_U32(skb, XFRMA_SAD_CNT, si.sadcnt); - NLA_PUT(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh); + err = nla_put_u32(skb, XFRMA_SAD_CNT, si.sadcnt); + if (!err) + err = nla_put(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, spid, seq, *flags) < 0) + if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(xfrm_nl, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_usersa_id *p = nlmsg_data(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; int err = -ESRCH; - x = xfrm_user_state_lookup(p, attrs, &err); + x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) goto out_noput; @@ -789,48 +1097,28 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: return err; } -static int verify_userspi_info(struct xfrm_userspi_info *p) -{ - switch (p->info.id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - break; - - case IPPROTO_COMP: - /* IPCOMP spi is 16-bits. */ - if (p->max >= 0x10000) - return -EINVAL; - break; - - default: - return -EINVAL; - } - - if (p->min > p->max) - return -EINVAL; - - return 0; -} - static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct xfrm_userspi_info *p; struct sk_buff *resp_skb; xfrm_address_t *daddr; int family; int err; + u32 mark; + struct xfrm_mark m; p = nlmsg_data(nlh); - err = verify_userspi_info(p); + err = verify_spi_info(p->info.id.proto, p->min, p->max); if (err) goto out_noput; @@ -838,16 +1126,18 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, daddr = &p->info.id.daddr; x = NULL; + + mark = xfrm_mark_get(attrs, &m); if (p->info.seq) { - x = xfrm_find_acq_byseq(p->info.seq); - if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { + x = xfrm_find_acq_byseq(net, mark, p->info.seq); + if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; } } if (!x) - x = xfrm_find_acq(p->info.mode, p->info.reqid, + x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid, p->info.id.proto, daddr, &p->info.saddr, 1, family); @@ -865,7 +1155,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -906,6 +1196,8 @@ static int verify_policy_type(u8 type) static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) { + int ret; + switch (p->share) { case XFRM_SHARE_ANY: case XFRM_SHARE_SESSION: @@ -931,7 +1223,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) break; case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) break; #else return -EAFNOSUPPORT; @@ -941,7 +1233,13 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) return -EINVAL; } - return verify_policy_dir(p->dir); + ret = verify_policy_dir(p->dir); + if (ret) + return ret; + if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + return -EINVAL; + + return 0; } static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs) @@ -953,7 +1251,7 @@ static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs return 0; uctx = nla_data(rt); - return security_xfrm_policy_alloc(pol, uctx); + return security_xfrm_policy_alloc(&pol->security, uctx, GFP_KERNEL); } static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, @@ -975,6 +1273,8 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, t->aalgos = ut->aalgos; t->ealgos = ut->ealgos; t->calgos = ut->calgos; + /* If all masks are ~0, then we allow all algorithms. */ + t->allalgs = !~(t->aalgos & t->ealgos & t->calgos); t->encap_family = ut->family; } } @@ -1000,7 +1300,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) switch (ut[i].family) { case AF_INET: break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: break; #endif @@ -1066,6 +1366,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { + memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); @@ -1078,9 +1379,9 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i p->share = XFRM_SHARE_ANY; /* XXX xp->share */ } -static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) +static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); + struct xfrm_policy *xp = xfrm_policy_alloc(net, GFP_KERNEL); int err; if (!xp) { @@ -1099,10 +1400,12 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, if (err) goto error; + xfrm_mark_get(attrs, &xp->mark); + return xp; error: *errp = err; - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } @@ -1110,6 +1413,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_info *p = nlmsg_data(nlh); struct xfrm_policy *xp; struct km_event c; @@ -1123,28 +1427,27 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - xp = xfrm_policy_construct(p, attrs, &err); + xp = xfrm_policy_construct(net, p, attrs, &err); if (!xp) return err; - /* shouldnt excl be based on nlh flags?? + /* shouldn't excl be based on nlh flags?? * Aha! this is anti-netlink really i.e more pfkey derived * in netlink excl is a flag and you wouldnt need * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); - xfrm_audit_policy_add(xp, err ? 0 : 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) { - security_xfrm_policy_free(xp); + security_xfrm_policy_free(xp->security); kfree(xp); return err; } c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); xfrm_pol_put(xp); @@ -1164,6 +1467,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); @@ -1190,9 +1494,8 @@ static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buf static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) { - if (xp->security) { + if (xp->security) return copy_sec_ctx(xp->security, skb); - } return 0; } static inline size_t userpolicy_type_attrsize(void) @@ -1228,49 +1531,59 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; + int err; - if (sp->this_idx < sp->start_idx) - goto out; - - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; p = nlmsg_data(nlh); copy_to_user_policy(xp, p, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; - + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_sec_ctx(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } nlmsg_end(skb, nlh); -out: - sp->this_idx++; return 0; +} -nlmsg_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; +static int xfrm_dump_policy_done(struct netlink_callback *cb) +{ + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct net *net = sock_net(cb->skb->sk); + + xfrm_policy_walk_done(walk, net); + return 0; } static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; struct xfrm_dump_info info; + BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) > + sizeof(cb->args) - sizeof(cb->args[0])); + info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info); -#ifdef CONFIG_XFRM_SUB_POLICY - (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info); -#endif - cb->args[0] = info.this_idx; + + if (!cb->args[0]) { + cb->args[0] = 1; + xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); + } + + (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); return skb->len; } @@ -1281,6 +1594,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) @@ -1290,11 +1604,11 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; - info.this_idx = info.start_idx = 0; - if (dump_one_policy(xp, dir, 0, &info) < 0) { + err = dump_one_policy(xp, dir, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; @@ -1303,12 +1617,15 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct km_event c; int delete; + struct xfrm_mark m; + u32 mark = xfrm_mark_get(attrs, &m); p = nlmsg_data(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; @@ -1322,25 +1639,26 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; - struct xfrm_policy tmp; + struct xfrm_sec_ctx *ctx; err = verify_sec_ctx_len(attrs); if (err) return err; - memset(&tmp, 0, sizeof(struct xfrm_policy)); + ctx = NULL; if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - if ((err = security_xfrm_policy_alloc(&tmp, uctx))) + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); + if (err) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, - delete, &err); - security_xfrm_policy_free(&tmp); + xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, &p->sel, + ctx, delete, &err); + security_xfrm_policy_free(ctx); } if (xp == NULL) return -ENOENT; @@ -1352,13 +1670,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, + NETLINK_CB(skb).portid); } } else { - xfrm_audit_policy_delete(xp, err ? 0 : 1, - NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err != 0) goto out; @@ -1366,99 +1682,133 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.byid = p->index; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); } out: xfrm_pol_put(xp); + if (delete && err == 0) + xfrm_garbage_collect(net); return err; } static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct km_event c; struct xfrm_usersa_flush *p = nlmsg_data(nlh); - struct xfrm_audit audit_info; int err; - audit_info.loginuid = NETLINK_CB(skb).loginuid; - audit_info.secid = NETLINK_CB(skb).sid; - err = xfrm_state_flush(p->proto, &audit_info); - if (err) + err = xfrm_state_flush(net, p->proto, true); + if (err) { + if (err == -ESRCH) /* empty table */ + return 0; return err; + } c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; + c.net = net; km_state_notify(NULL, &c); return 0; } -static inline size_t xfrm_aevent_msgsize(void) +static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) { + size_t replay_size = x->replay_esn ? + xfrm_replay_state_esn_len(x->replay_esn) : + sizeof(struct xfrm_replay_state); + return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) - + nla_total_size(sizeof(struct xfrm_replay_state)) + + nla_total_size(replay_size) + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ + nla_total_size(4); /* XFRM_AE_ETHR */ } -static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) +static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { struct xfrm_aevent_id *id; struct nlmsghdr *nlh; + int err; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); if (nlh == NULL) return -EMSGSIZE; id = nlmsg_data(nlh); - memcpy(&id->sa_id.daddr, &x->id.daddr,sizeof(x->id.daddr)); + memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr)); id->sa_id.spi = x->id.spi; id->sa_id.family = x->props.family; id->sa_id.proto = x->id.proto; - memcpy(&id->saddr, &x->props.saddr,sizeof(x->props.saddr)); + memcpy(&id->saddr, &x->props.saddr, sizeof(x->props.saddr)); id->reqid = x->props.reqid; id->flags = c->data.aevent; - NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); - NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); - - if (id->flags & XFRM_AE_RTHR) - NLA_PUT_U32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); + if (x->replay_esn) { + err = nla_put(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); + } else { + err = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), + &x->replay); + } + if (err) + goto out_cancel; + err = nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); + if (err) + goto out_cancel; - if (id->flags & XFRM_AE_ETHR) - NLA_PUT_U32(skb, XFRMA_ETIMER_THRESH, - x->replay_maxage * 10 / HZ); + if (id->flags & XFRM_AE_RTHR) { + err = nla_put_u32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); + if (err) + goto out_cancel; + } + if (id->flags & XFRM_AE_ETHR) { + err = nla_put_u32(skb, XFRMA_ETIMER_THRESH, + x->replay_maxage * 10 / HZ); + if (err) + goto out_cancel; + } + err = xfrm_mark_put(skb, &x->mark); + if (err) + goto out_cancel; return nlmsg_end(skb, nlh); -nla_put_failure: +out_cancel: nlmsg_cancel(skb, nlh); - return -EMSGSIZE; + return err; } static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct sk_buff *r_skb; int err; struct km_event c; + u32 mark; + struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); struct xfrm_usersa_id *id = &p->sa_id; - r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); - if (r_skb == NULL) - return -ENOMEM; + mark = xfrm_mark_get(attrs, &m); - x = xfrm_state_lookup(&id->daddr, id->spi, id->proto, id->family); - if (x == NULL) { - kfree_skb(r_skb); + x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); + if (x == NULL) return -ESRCH; + + r_skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); + if (r_skb == NULL) { + xfrm_state_put(x); + return -ENOMEM; } /* @@ -1469,11 +1819,11 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, spin_lock_bh(&x->lock); c.data.aevent = p->flags; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = nlmsg_unicast(xfrm_nl, r_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -1482,34 +1832,44 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct km_event c; - int err = - EINVAL; + int err = -EINVAL; + u32 mark = 0; + struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; - if (!lt && !rp) + if (!lt && !rp && !re) return err; /* pedantic mode - thou shalt sayeth replaceth */ if (!(nlh->nlmsg_flags&NLM_F_REPLACE)) return err; - x = xfrm_state_lookup(&p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); + mark = xfrm_mark_get(attrs, &m); + + x = xfrm_state_lookup(net, mark, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); if (x == NULL) return -ESRCH; if (x->km.state != XFRM_STATE_VALID) goto out; + err = xfrm_replay_verify_len(x->replay_esn, re); + if (err) + goto out; + spin_lock_bh(&x->lock); - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 1); spin_unlock_bh(&x->lock); c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.data.aevent = XFRM_AE_CU; km_state_notify(x, &c); err = 0; @@ -1521,24 +1881,27 @@ out: static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; - struct xfrm_audit audit_info; err = copy_from_user_policy_type(&type, attrs); if (err) return err; - audit_info.loginuid = NETLINK_CB(skb).loginuid; - audit_info.secid = NETLINK_CB(skb).sid; - err = xfrm_policy_flush(type, &audit_info); - if (err) + err = xfrm_policy_flush(net, type, true); + if (err) { + if (err == -ESRCH) /* empty table */ + return 0; return err; + } + c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; + c.net = net; km_policy_notify(NULL, 0, &c); return 0; } @@ -1546,58 +1909,60 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_user_polexpire *up = nlmsg_data(nlh); struct xfrm_userpolicy_info *p = &up->pol; u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; + struct xfrm_mark m; + u32 mark = xfrm_mark_get(attrs, &m); err = copy_from_user_policy_type(&type, attrs); if (err) return err; + err = verify_policy_dir(p->dir); + if (err) + return err; + if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; - struct xfrm_policy tmp; + struct xfrm_sec_ctx *ctx; err = verify_sec_ctx_len(attrs); if (err) return err; - memset(&tmp, 0, sizeof(struct xfrm_policy)); + ctx = NULL; if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - if ((err = security_xfrm_policy_alloc(&tmp, uctx))) + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); + if (err) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, - 0, &err); - security_xfrm_policy_free(&tmp); + xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, + &p->sel, ctx, 0, &err); + security_xfrm_policy_free(ctx); } - if (xp == NULL) return -ENOENT; - read_lock(&xp->lock); - if (xp->dead) { - read_unlock(&xp->lock); + + if (unlikely(xp->walk.dead)) goto out; - } - read_unlock(&xp->lock); err = 0; if (up->hard) { xfrm_policy_delete(xp, p->dir); - xfrm_audit_policy_delete(xp, 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); - + xfrm_audit_policy_delete(xp, 1, true); } else { // reset the timers here? - printk("Dont know what to do with soft policy expire\n"); + WARN(1, "Dont know what to do with soft policy expire\n"); } - km_policy_expired(xp, p->dir, up->hard, current->pid); + km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid); out: xfrm_pol_put(xp); @@ -1607,12 +1972,15 @@ out: static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; int err; struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; + struct xfrm_mark m; + u32 mark = xfrm_mark_get(attrs, &m); - x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family); + x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family); err = -ENOENT; if (x == NULL) @@ -1622,12 +1990,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = -EINVAL; if (x->km.state != XFRM_STATE_VALID) goto out; - km_state_expired(x, ue->hard, current->pid); + km_state_expired(x, ue->hard, nlh->nlmsg_pid); if (ue->hard) { __xfrm_state_delete(x); - xfrm_audit_state_delete(x, 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_state_delete(x, 1, true); } err = 0; out: @@ -1639,36 +2006,36 @@ out: static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_user_tmpl *ut; int i; struct nlattr *rt = attrs[XFRMA_TMPL]; + struct xfrm_mark mark; struct xfrm_user_acquire *ua = nlmsg_data(nlh); - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; if (!x) - return err; + goto nomem; + + xfrm_mark_get(attrs, &mark); err = verify_newpolicy_info(&ua->policy); - if (err) { - printk("BAD policy passed\n"); - kfree(x); - return err; - } + if (err) + goto bad_policy; /* build an XP */ - xp = xfrm_policy_construct(&ua->policy, attrs, &err); - if (!xp) { - kfree(x); - return err; - } + xp = xfrm_policy_construct(net, &ua->policy, attrs, &err); + if (!xp) + goto free_state; memcpy(&x->id, &ua->id, sizeof(ua->id)); memcpy(&x->props.saddr, &ua->saddr, sizeof(ua->saddr)); memcpy(&x->sel, &ua->sel, sizeof(ua->sel)); - + xp->mark.m = x->mark.m = mark.m; + xp->mark.v = x->mark.v = mark.v; ut = nla_data(rt); /* extract the templates and for each call km_key */ for (i = 0; i < xp->xfrm_nr; i++, ut++) { @@ -1688,16 +2055,34 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, kfree(xp); return 0; + +bad_policy: + WARN(1, "BAD policy passed\n"); +free_state: + kfree(x); +nomem: + return err; } #ifdef CONFIG_XFRM_MIGRATE static int copy_from_user_migrate(struct xfrm_migrate *ma, + struct xfrm_kmaddress *k, struct nlattr **attrs, int *num) { struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; + if (k != NULL) { + struct xfrm_user_kmaddress *uk; + + uk = nla_data(attrs[XFRMA_KMADDRESS]); + memcpy(&k->local, &uk->local, sizeof(k->local)); + memcpy(&k->remote, &uk->remote, sizeof(k->remote)); + k->family = uk->family; + k->reserved = uk->reserved; + } + um = nla_data(rt); num_migrate = nla_len(rt) / sizeof(*um); @@ -1727,26 +2112,29 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, { struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress km, *kmp; u8 type; int err; int n = 0; + struct net *net = sock_net(skb->sk); if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; + kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; + err = copy_from_user_policy_type(&type, attrs); if (err) return err; - err = copy_from_user_migrate((struct xfrm_migrate *)m, - attrs, &n); + err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n); if (err) return err; if (!n) return 0; - xfrm_migrate(&pi->sel, pi->dir, type, m, n); + xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net); return 0; } @@ -1759,7 +2147,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, #endif #ifdef CONFIG_XFRM_MIGRATE -static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) +static int copy_to_user_migrate(const struct xfrm_migrate *m, struct sk_buff *skb) { struct xfrm_user_migrate um; @@ -1777,21 +2165,35 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); } -static inline size_t xfrm_migrate_msgsize(int num_migrate) +static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff *skb) +{ + struct xfrm_user_kmaddress uk; + + memset(&uk, 0, sizeof(uk)); + uk.family = k->family; + uk.reserved = k->reserved; + memcpy(&uk.local, &k->local, sizeof(uk.local)); + memcpy(&uk.remote, &k->remote, sizeof(uk.remote)); + + return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); +} + +static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) - + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) - + userpolicy_type_attrsize(); + + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); } -static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, - int num_migrate, struct xfrm_selector *sel, - u8 dir, u8 type) +static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, + int num_migrate, const struct xfrm_kmaddress *k, + const struct xfrm_selector *sel, u8 dir, u8 type) { - struct xfrm_migrate *mp; + const struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; - int i; + int i, err; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0); if (nlh == NULL) @@ -1803,38 +2205,48 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); pol_id->dir = dir; - if (copy_to_user_policy_type(type, skb) < 0) - goto nlmsg_failure; - + if (k != NULL) { + err = copy_to_user_kmaddress(k, skb); + if (err) + goto out_cancel; + } + err = copy_to_user_policy_type(type, skb); + if (err) + goto out_cancel; for (i = 0, mp = m ; i < num_migrate; i++, mp++) { - if (copy_to_user_migrate(mp, skb) < 0) - goto nlmsg_failure; + err = copy_to_user_migrate(mp, skb); + if (err) + goto out_cancel; } return nlmsg_end(skb, nlh); -nlmsg_failure: + +out_cancel: nlmsg_cancel(skb, nlh); - return -EMSGSIZE; + return err; } -static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) { + struct net *net = &init_net; struct sk_buff *skb; - skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; /* build migrate */ - if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) + if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); } #else -static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } @@ -1868,6 +2280,10 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { #undef XMSGSIZE static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { + [XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)}, + [XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)}, + [XFRMA_LASTUSED] = { .type = NLA_U64}, + [XFRMA_ALG_AUTH_TRUNC] = { .len = sizeof(struct xfrm_algo_auth)}, [XFRMA_ALG_AEAD] = { .len = sizeof(struct xfrm_algo_aead) }, [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, @@ -1883,20 +2299,30 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, + [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, + [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, + [XFRMA_TFCPAD] = { .type = NLA_U32 }, + [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, + [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, + [XFRMA_PROTO] = { .type = NLA_U8 }, + [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, }; -static struct xfrm_link { +static const struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); + int (*done)(struct netlink_callback *); } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, [XFRM_MSG_DELSA - XFRM_MSG_BASE] = { .doit = xfrm_del_sa }, [XFRM_MSG_GETSA - XFRM_MSG_BASE] = { .doit = xfrm_get_sa, - .dump = xfrm_dump_sa }, + .dump = xfrm_dump_sa, + .done = xfrm_dump_sa_done }, [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, - .dump = xfrm_dump_policy }, + .dump = xfrm_dump_policy, + .done = xfrm_dump_policy_done }, [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_acquire }, [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_sa_expire }, @@ -1914,8 +2340,9 @@ static struct xfrm_link { static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; - struct xfrm_link *link; + const struct xfrm_link *link; int type, err; type = nlh->nlmsg_type; @@ -1926,7 +2353,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) link = &xfrm_dispatch[type]; /* All operations require privileges, even GET */ - if (security_netlink_recv(skb, CAP_NET_ADMIN)) + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || @@ -1935,7 +2362,13 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL); + { + struct netlink_dump_control c = { + .dump = link->dump, + .done = link->done, + }; + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + } } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, @@ -1951,22 +2384,26 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) static void xfrm_netlink_rcv(struct sk_buff *skb) { - mutex_lock(&xfrm_cfg_mutex); + struct net *net = sock_net(skb->sk); + + mutex_lock(&net->xfrm.xfrm_cfg_mutex); netlink_rcv_skb(skb, &xfrm_user_rcv_msg); - mutex_unlock(&xfrm_cfg_mutex); + mutex_unlock(&net->xfrm.xfrm_cfg_mutex); } static inline size_t xfrm_expire_msgsize(void) { - return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)); + return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + + nla_total_size(sizeof(struct xfrm_mark)); } -static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) +static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; + int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); if (nlh == NULL) return -EMSGSIZE; @@ -1974,39 +2411,48 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; + err = xfrm_mark_put(skb, &x->mark); + if (err) + return err; + return nlmsg_end(skb, nlh); } -static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) { + struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_expire(skb, x, c) < 0) - BUG(); + if (build_expire(skb, x, c) < 0) { + kfree_skb(skb); + return -EMSGSIZE; + } - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } -static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c) { + struct net *net = xs_net(x); struct sk_buff *skb; - skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); + skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_aevent(skb, x, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); } -static int xfrm_notify_sa_flush(struct km_event *c) +static int xfrm_notify_sa_flush(const struct km_event *c) { + struct net *net = c->net; struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2016,7 +2462,7 @@ static int xfrm_notify_sa_flush(struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); if (nlh == NULL) { kfree_skb(skb); return -EMSGSIZE; @@ -2027,7 +2473,7 @@ static int xfrm_notify_sa_flush(struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); } static inline size_t xfrm_sa_len(struct xfrm_state *x) @@ -2035,19 +2481,28 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) size_t l = 0; if (x->aead) l += nla_total_size(aead_len(x->aead)); - if (x->aalg) - l += nla_total_size(xfrm_alg_len(x->aalg)); + if (x->aalg) { + l += nla_total_size(sizeof(struct xfrm_algo) + + (x->aalg->alg_key_len + 7) / 8); + l += nla_total_size(xfrm_alg_auth_len(x->aalg)); + } if (x->ealg) l += nla_total_size(xfrm_alg_len(x->ealg)); if (x->calg) l += nla_total_size(sizeof(*x->calg)); if (x->encap) l += nla_total_size(sizeof(*x->encap)); + if (x->tfcpad) + l += nla_total_size(sizeof(x->tfcpad)); + if (x->replay_esn) + l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn)); if (x->security) l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + x->security->ctx_len); if (x->coaddr) l += nla_total_size(sizeof(*x->coaddr)); + if (x->props.extra_flags) + l += nla_total_size(sizeof(x->props.extra_flags)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size(sizeof(u64)); @@ -2055,19 +2510,21 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) return l; } -static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) +static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) { + struct net *net = xs_net(x); struct xfrm_usersa_info *p; struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; int len = xfrm_sa_len(x); - int headlen; + int headlen, err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { len += nla_total_size(headlen); headlen = sizeof(*id); + len += nla_total_size(sizeof(struct xfrm_mark)); } len += NLMSG_ALIGN(headlen); @@ -2075,9 +2532,10 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); + err = -EMSGSIZE; if (nlh == NULL) - goto nla_put_failure; + goto out_free_skb; p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELSA) { @@ -2090,27 +2548,26 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) id->proto = x->id.proto; attr = nla_reserve(skb, XFRMA_SA, sizeof(*p)); + err = -EMSGSIZE; if (attr == NULL) - goto nla_put_failure; + goto out_free_skb; p = nla_data(attr); } - - if (copy_to_user_state_extra(x, p, skb)) - goto nla_put_failure; + err = copy_to_user_state_extra(x, p, skb); + if (err) + goto out_free_skb; nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); -nla_put_failure: - /* Somebody screwed up with xfrm_sa_len! */ - WARN_ON(1); +out_free_skb: kfree_skb(skb); - return -1; + return err; } -static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c) { switch (c->event) { @@ -2125,8 +2582,9 @@ static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) case XFRM_MSG_FLUSHSA: return xfrm_notify_sa_flush(c); default: - printk("xfrm_user: Unknown SA event %d\n", c->event); - break; + printk(KERN_NOTICE "xfrm_user: Unknown SA event %d\n", + c->event); + break; } return 0; @@ -2138,17 +2596,18 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, { return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(xfrm_user_sec_ctx_size(x->security)) + userpolicy_type_attrsize(); } static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, - struct xfrm_tmpl *xt, struct xfrm_policy *xp, - int dir) + struct xfrm_tmpl *xt, struct xfrm_policy *xp) { + __u32 seq = xfrm_get_acqseq(); struct xfrm_user_acquire *ua; struct nlmsghdr *nlh; - __u32 seq = xfrm_get_acqseq(); + int err; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0); if (nlh == NULL) @@ -2158,39 +2617,41 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, memcpy(&ua->id, &x->id, sizeof(ua->id)); memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); - copy_to_user_policy(xp, &ua->policy, dir); + copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT); ua->aalgos = xt->aalgos; ua->ealgos = xt->ealgos; ua->calgos = xt->calgos; ua->seq = x->km.seq = seq; - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_state_sec_ctx(x, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_state_sec_ctx(x, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } return nlmsg_end(skb, nlh); - -nlmsg_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, - struct xfrm_policy *xp, int dir) + struct xfrm_policy *xp) { + struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_acquire(skb, x, xt, xp, dir) < 0) + if (build_acquire(skb, x, xt, xp) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2199,6 +2660,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { + struct net *net = sock_net(sk); struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1); struct xfrm_policy *xp; @@ -2211,7 +2673,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, return NULL; } break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (opt != IPV6_XFRM_POLICY) { *dir = -EOPNOTSUPP; @@ -2237,7 +2699,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, if (p->dir > XFRM_POLICY_OUT) return NULL; - xp = xfrm_policy_alloc(GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; @@ -2257,39 +2719,43 @@ static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + nla_total_size(xfrm_user_sec_ctx_size(xp->security)) + + nla_total_size(sizeof(struct xfrm_mark)) + userpolicy_type_attrsize(); } static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, - int dir, struct km_event *c) + int dir, const struct km_event *c) { struct xfrm_user_polexpire *upe; - struct nlmsghdr *nlh; int hard = c->data.hard; + struct nlmsghdr *nlh; + int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); if (nlh == NULL) return -EMSGSIZE; upe = nlmsg_data(nlh); copy_to_user_policy(xp, &upe->pol, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_sec_ctx(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } upe->hard = !!hard; return nlmsg_end(skb, nlh); - -nlmsg_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } -static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { + struct net *net = xp_net(xp); struct sk_buff *skb; skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); @@ -2299,17 +2765,18 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } -static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { + int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - int headlen; + int headlen, err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { @@ -2317,15 +2784,17 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * headlen = sizeof(*id); } len += userpolicy_type_attrsize(); + len += nla_total_size(sizeof(struct xfrm_mark)); len += NLMSG_ALIGN(headlen); skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); + err = -EMSGSIZE; if (nlh == NULL) - goto nlmsg_failure; + goto out_free_skb; p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELPOLICY) { @@ -2340,52 +2809,60 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * memcpy(&id->sel, &xp->selector, sizeof(id->sel)); attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p)); + err = -EMSGSIZE; if (attr == NULL) - goto nlmsg_failure; + goto out_free_skb; p = nla_data(attr); } copy_to_user_policy(xp, p, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) + goto out_free_skb; nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); -nlmsg_failure: +out_free_skb: kfree_skb(skb); - return -1; + return err; } -static int xfrm_notify_policy_flush(struct km_event *c) +static int xfrm_notify_policy_flush(const struct km_event *c) { + struct net *net = c->net; struct nlmsghdr *nlh; struct sk_buff *skb; + int err; skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + err = -EMSGSIZE; if (nlh == NULL) - goto nlmsg_failure; - if (copy_to_user_policy_type(c->data.type, skb) < 0) - goto nlmsg_failure; + goto out_free_skb; + err = copy_to_user_policy_type(c->data.type, skb); + if (err) + goto out_free_skb; nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); -nlmsg_failure: +out_free_skb: kfree_skb(skb); - return -1; + return err; } -static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { switch (c->event) { @@ -2398,7 +2875,8 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev case XFRM_MSG_POLEXPIRE: return xfrm_exp_policy_notify(xp, dir, c); default: - printk("xfrm_user: Unknown Policy event %d\n", c->event); + printk(KERN_NOTICE "xfrm_user: Unknown Policy event %d\n", + c->event); } return 0; @@ -2424,18 +2902,18 @@ static int build_report(struct sk_buff *skb, u8 proto, ur->proto = proto; memcpy(&ur->sel, sel, sizeof(ur->sel)); - if (addr) - NLA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); - + if (addr) { + int err = nla_put(skb, XFRMA_COADDR, sizeof(*addr), addr); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + } return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } -static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, - xfrm_address_t *addr) +static int xfrm_send_report(struct net *net, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) { struct sk_buff *skb; @@ -2446,7 +2924,64 @@ static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, if (build_report(skb, proto, sel, addr) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); +} + +static inline size_t xfrm_mapping_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); +} + +static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, + xfrm_address_t *new_saddr, __be16 new_sport) +{ + struct xfrm_user_mapping *um; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MAPPING, sizeof(*um), 0); + if (nlh == NULL) + return -EMSGSIZE; + + um = nlmsg_data(nlh); + + memcpy(&um->id.daddr, &x->id.daddr, sizeof(um->id.daddr)); + um->id.spi = x->id.spi; + um->id.family = x->props.family; + um->id.proto = x->id.proto; + memcpy(&um->new_saddr, new_saddr, sizeof(um->new_saddr)); + memcpy(&um->old_saddr, &x->props.saddr, sizeof(um->old_saddr)); + um->new_sport = new_sport; + um->old_sport = x->encap->encap_sport; + um->reqid = x->props.reqid; + + return nlmsg_end(skb, nlh); +} + +static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, + __be16 sport) +{ + struct net *net = xs_net(x); + struct sk_buff *skb; + + if (x->id.proto != IPPROTO_ESP) + return -EINVAL; + + if (!x->encap) + return -EINVAL; + + skb = nlmsg_new(xfrm_mapping_msgsize(), GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_mapping(skb, x, ipaddr, sport) < 0) + BUG(); + + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); +} + +static bool xfrm_is_alive(const struct km_event *c) +{ + return (bool)xfrm_acquire_is_on(c->net); } static struct xfrm_mgr netlink_mgr = { @@ -2457,33 +2992,60 @@ static struct xfrm_mgr netlink_mgr = { .notify_policy = xfrm_send_policy_notify, .report = xfrm_send_report, .migrate = xfrm_send_migrate, + .new_mapping = xfrm_send_mapping, + .is_alive = xfrm_is_alive, }; -static int __init xfrm_user_init(void) +static int __net_init xfrm_user_net_init(struct net *net) { struct sock *nlsk; + struct netlink_kernel_cfg cfg = { + .groups = XFRMNLGRP_MAX, + .input = xfrm_netlink_rcv, + }; - printk(KERN_INFO "Initializing XFRM netlink socket\n"); - - nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, - xfrm_netlink_rcv, NULL, THIS_MODULE); + nlsk = netlink_kernel_create(net, NETLINK_XFRM, &cfg); if (nlsk == NULL) return -ENOMEM; - rcu_assign_pointer(xfrm_nl, nlsk); + net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ + rcu_assign_pointer(net->xfrm.nlsk, nlsk); + return 0; +} - xfrm_register_km(&netlink_mgr); +static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) +{ + struct net *net; + list_for_each_entry(net, net_exit_list, exit_list) + RCU_INIT_POINTER(net->xfrm.nlsk, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->xfrm.nlsk_stash); +} - return 0; +static struct pernet_operations xfrm_user_net_ops = { + .init = xfrm_user_net_init, + .exit_batch = xfrm_user_net_exit, +}; + +static int __init xfrm_user_init(void) +{ + int rv; + + printk(KERN_INFO "Initializing XFRM netlink socket\n"); + + rv = register_pernet_subsys(&xfrm_user_net_ops); + if (rv < 0) + return rv; + rv = xfrm_register_km(&netlink_mgr); + if (rv < 0) + unregister_pernet_subsys(&xfrm_user_net_ops); + return rv; } static void __exit xfrm_user_exit(void) { - struct sock *nlsk = xfrm_nl; - xfrm_unregister_km(&netlink_mgr); - rcu_assign_pointer(xfrm_nl, NULL); - synchronize_rcu(); - netlink_kernel_release(nlsk); + unregister_pernet_subsys(&xfrm_user_net_ops); } module_init(xfrm_user_init); |
