diff options
Diffstat (limited to 'net/xfrm')
| -rw-r--r-- | net/xfrm/Kconfig | 68 | ||||
| -rw-r--r-- | net/xfrm/Makefile | 8 | ||||
| -rw-r--r-- | net/xfrm/xfrm_algo.c | 838 | ||||
| -rw-r--r-- | net/xfrm/xfrm_hash.c | 39 | ||||
| -rw-r--r-- | net/xfrm/xfrm_hash.h | 136 | ||||
| -rw-r--r-- | net/xfrm/xfrm_input.c | 322 | ||||
| -rw-r--r-- | net/xfrm/xfrm_ipcomp.c | 386 | ||||
| -rw-r--r-- | net/xfrm/xfrm_output.c | 238 | ||||
| -rw-r--r-- | net/xfrm/xfrm_policy.c | 3111 | ||||
| -rw-r--r-- | net/xfrm/xfrm_proc.c | 86 | ||||
| -rw-r--r-- | net/xfrm/xfrm_replay.c | 603 | ||||
| -rw-r--r-- | net/xfrm/xfrm_state.c | 2073 | ||||
| -rw-r--r-- | net/xfrm/xfrm_sysctl.c | 86 | ||||
| -rw-r--r-- | net/xfrm/xfrm_user.c | 2583 |
14 files changed, 8537 insertions, 2040 deletions
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 0c1c04322ba..bda1a13628a 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -5,18 +5,61 @@ config XFRM bool depends on NET +config XFRM_ALGO + tristate + select XFRM + select CRYPTO + config XFRM_USER - tristate "IPsec user configuration interface" - depends on INET && XFRM + tristate "Transformation user configuration interface" + depends on INET + select XFRM_ALGO ---help--- - Support for IPsec user configuration interface used - by native Linux tools. + Support for Transformation(XFRM) user configuration interface + like IPsec used by native Linux tools. If unsure, say Y. +config XFRM_SUB_POLICY + bool "Transformation sub policy support" + depends on XFRM + ---help--- + Support sub policy for developers. By using sub policy with main + one, two policies can be applied to the same packet at once. + Policy which lives shorter time in kernel should be a sub. + + If unsure, say N. + +config XFRM_MIGRATE + bool "Transformation migrate database" + depends on XFRM + ---help--- + A feature to update locator(s) of a given IPsec security + association dynamically. This feature is required, for + instance, in a Mobile IPv6 environment with IPsec configuration + where mobile nodes change their attachment point to the Internet. + + If unsure, say N. + +config XFRM_STATISTICS + bool "Transformation statistics" + depends on INET && XFRM && PROC_FS + ---help--- + This statistics is not a SNMP/MIB specification but shows + statistics about transformation error (or almost error) factor + at packet processing for developer. + + If unsure, say N. + +config XFRM_IPCOMP + tristate + select XFRM_ALGO + select CRYPTO + select CRYPTO_DEFLATE + config NET_KEY tristate "PF_KEY sockets" - select XFRM + select XFRM_ALGO ---help--- PF_KEYv2 socket family, compatible to KAME ones. They are required if you are going to use IPsec tools ported @@ -24,4 +67,19 @@ config NET_KEY Say Y unless you know what you are doing. +config NET_KEY_MIGRATE + bool "PF_KEY MIGRATE" + depends on NET_KEY + select XFRM_MIGRATE + ---help--- + Add a PF_KEY MIGRATE message to PF_KEYv2 socket family. + The PF_KEY MIGRATE message is used to dynamically update + locator(s) of a given IPsec security association. + This feature is required, for instance, in a Mobile IPv6 + environment with IPsec configuration where mobile nodes + change their attachment point to the Internet. Detail + information can be found in the internet-draft + <draft-sugimoto-mip6-pfkey-migrate>. + + If unsure, say N. diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 693aac1aa83..c0e961983f1 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -2,6 +2,10 @@ # Makefile for the XFRM subsystem. # -obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o +obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ + xfrm_input.o xfrm_output.o \ + xfrm_sysctl.o xfrm_replay.o +obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o +obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o - +obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 6ed3302312f..debe733386f 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -1,27 +1,23 @@ -/* +/* * xfrm algorithm interface * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) + * Software Foundation; either version 2 of the License, or (at your option) * any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/crypto.h> +#include <linux/scatterlist.h> #include <net/xfrm.h> -#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) -#include <net/ah.h> -#endif #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) #include <net/esp.h> #endif -#include <asm/scatterlist.h> /* * Algorithms supported by IPsec. These entries contain properties which @@ -29,17 +25,148 @@ * that instantiated crypto transforms have correct parameters for IPsec * purposes. */ +static struct xfrm_algo_desc aead_list[] = { +{ + .name = "rfc4106(gcm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 64, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4106(gcm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 96, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4106(gcm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4309(ccm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 64, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4309(ccm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 96, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4309(ccm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4543(gcm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +}; + static struct xfrm_algo_desc aalg_list[] = { { .name = "digest_null", - + .uinfo = { .auth = { .icv_truncbits = 0, .icv_fullbits = 0, } }, - + + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_NULL, .sadb_alg_ivlen = 0, @@ -48,7 +175,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "md5", + .name = "hmac(md5)", + .compat = "md5", .uinfo = { .auth = { @@ -56,7 +184,9 @@ static struct xfrm_algo_desc aalg_list[] = { .icv_fullbits = 128, } }, - + + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_AALG_MD5HMAC, .sadb_alg_ivlen = 0, @@ -65,7 +195,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "sha1", + .name = "hmac(sha1)", + .compat = "sha1", .uinfo = { .auth = { @@ -74,6 +205,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_AALG_SHA1HMAC, .sadb_alg_ivlen = 0, @@ -82,7 +215,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "sha256", + .name = "hmac(sha256)", + .compat = "sha256", .uinfo = { .auth = { @@ -91,6 +225,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, .sadb_alg_ivlen = 0, @@ -99,7 +235,46 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "ripemd160", + .name = "hmac(sha384)", + + .uinfo = { + .auth = { + .icv_truncbits = 192, + .icv_fullbits = 384, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SHA2_384HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 384, + .sadb_alg_maxbits = 384 + } +}, +{ + .name = "hmac(sha512)", + + .uinfo = { + .auth = { + .icv_truncbits = 256, + .icv_fullbits = 512, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SHA2_512HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 512, + .sadb_alg_maxbits = 512 + } +}, +{ + .name = "hmac(rmd160)", + .compat = "rmd160", .uinfo = { .auth = { @@ -108,6 +283,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, .sadb_alg_ivlen = 0, @@ -115,19 +292,54 @@ static struct xfrm_algo_desc aalg_list[] = { .sadb_alg_maxbits = 160 } }, +{ + .name = "xcbc(aes)", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 128 + } +}, +{ + /* rfc4494 */ + .name = "cmac(aes)", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 128, + } + }, + + .pfkey_supported = 0, +}, }; static struct xfrm_algo_desc ealg_list[] = { { - .name = "cipher_null", - + .name = "ecb(cipher_null)", + .compat = "cipher_null", + .uinfo = { .encr = { .blockbits = 8, .defkeybits = 0, } }, - + + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_NULL, .sadb_alg_ivlen = 0, @@ -136,7 +348,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "des", + .name = "cbc(des)", + .compat = "des", .uinfo = { .encr = { @@ -145,6 +358,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_DESCBC, .sadb_alg_ivlen = 8, @@ -153,7 +368,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "des3_ede", + .name = "cbc(des3_ede)", + .compat = "des3_ede", .uinfo = { .encr = { @@ -162,6 +378,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_3DESCBC, .sadb_alg_ivlen = 8, @@ -170,7 +388,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "cast128", + .name = "cbc(cast5)", + .compat = "cast5", .uinfo = { .encr = { @@ -179,6 +398,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_CASTCBC, .sadb_alg_ivlen = 8, @@ -187,7 +408,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "blowfish", + .name = "cbc(blowfish)", + .compat = "blowfish", .uinfo = { .encr = { @@ -196,6 +418,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, .sadb_alg_ivlen = 8, @@ -204,7 +428,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "aes", + .name = "cbc(aes)", + .compat = "aes", .uinfo = { .encr = { @@ -213,6 +438,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AESCBC, .sadb_alg_ivlen = 8, @@ -221,38 +448,83 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "serpent", - - .uinfo = { - .encr = { - .blockbits = 128, - .defkeybits = 128, - } - }, - - .desc = { - .sadb_alg_id = SADB_X_EALG_SERPENTCBC, - .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 128, - .sadb_alg_maxbits = 256, - } + .name = "cbc(serpent)", + .compat = "serpent", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_SERPENTCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256, + } +}, +{ + .name = "cbc(camellia)", + .compat = "camellia", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_CAMELLIACBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "cbc(twofish)", + .compat = "twofish", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_TWOFISHCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } }, { - .name = "twofish", - - .uinfo = { - .encr = { - .blockbits = 128, - .defkeybits = 128, - } - }, - - .desc = { - .sadb_alg_id = SADB_X_EALG_TWOFISHCBC, - .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 128, - .sadb_alg_maxbits = 256 - } + .name = "rfc3686(ctr(aes))", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 160, /* 128-bit key + 32-bit nonce */ + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AESCTR, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 288 + } }, }; @@ -264,6 +536,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 90, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE } }, { @@ -273,6 +546,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 90, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZS } }, { @@ -282,10 +556,16 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 50, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZJH } }, }; +static inline int aead_entries(void) +{ + return ARRAY_SIZE(aead_list); +} + static inline int aalg_entries(void) { return ARRAY_SIZE(aalg_list); @@ -301,66 +581,51 @@ static inline int calg_entries(void) return ARRAY_SIZE(calg_list); } -/* Todo: generic iterators */ -struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id) -{ - int i; - - for (i = 0; i < aalg_entries(); i++) { - if (aalg_list[i].desc.sadb_alg_id == alg_id) { - if (aalg_list[i].available) - return &aalg_list[i]; - else - break; - } - } - return NULL; -} -EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid); +struct xfrm_algo_list { + struct xfrm_algo_desc *algs; + int entries; + u32 type; + u32 mask; +}; -struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id) -{ - int i; +static const struct xfrm_algo_list xfrm_aead_list = { + .algs = aead_list, + .entries = ARRAY_SIZE(aead_list), + .type = CRYPTO_ALG_TYPE_AEAD, + .mask = CRYPTO_ALG_TYPE_MASK, +}; - for (i = 0; i < ealg_entries(); i++) { - if (ealg_list[i].desc.sadb_alg_id == alg_id) { - if (ealg_list[i].available) - return &ealg_list[i]; - else - break; - } - } - return NULL; -} -EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid); +static const struct xfrm_algo_list xfrm_aalg_list = { + .algs = aalg_list, + .entries = ARRAY_SIZE(aalg_list), + .type = CRYPTO_ALG_TYPE_HASH, + .mask = CRYPTO_ALG_TYPE_HASH_MASK, +}; -struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) -{ - int i; +static const struct xfrm_algo_list xfrm_ealg_list = { + .algs = ealg_list, + .entries = ARRAY_SIZE(ealg_list), + .type = CRYPTO_ALG_TYPE_BLKCIPHER, + .mask = CRYPTO_ALG_TYPE_BLKCIPHER_MASK, +}; - for (i = 0; i < calg_entries(); i++) { - if (calg_list[i].desc.sadb_alg_id == alg_id) { - if (calg_list[i].available) - return &calg_list[i]; - else - break; - } - } - return NULL; -} -EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); +static const struct xfrm_algo_list xfrm_calg_list = { + .algs = calg_list, + .entries = ARRAY_SIZE(calg_list), + .type = CRYPTO_ALG_TYPE_COMPRESS, + .mask = CRYPTO_ALG_TYPE_MASK, +}; -static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, - int entries, char *name, - int probe) +static struct xfrm_algo_desc *xfrm_find_algo( + const struct xfrm_algo_list *algo_list, + int match(const struct xfrm_algo_desc *entry, const void *data), + const void *data, int probe) { + struct xfrm_algo_desc *list = algo_list->algs; int i, status; - if (!name) - return NULL; - - for (i = 0; i < entries; i++) { - if (strcmp(name, list[i].name)) + for (i = 0; i < algo_list->entries; i++) { + if (!match(list + i, data)) continue; if (list[i].available) @@ -369,7 +634,8 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, if (!probe) break; - status = crypto_alg_available(name, 0); + status = crypto_has_alg(list[i].name, algo_list->type, + algo_list->mask); if (!status) break; @@ -379,24 +645,90 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, return NULL; } -struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) +static int xfrm_alg_id_match(const struct xfrm_algo_desc *entry, + const void *data) +{ + return entry->desc.sadb_alg_id == (unsigned long)data; +} + +struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id) +{ + return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_id_match, + (void *)(unsigned long)alg_id, 1); +} +EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid); + +struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id) +{ + return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_id_match, + (void *)(unsigned long)alg_id, 1); +} +EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid); + +struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) +{ + return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_id_match, + (void *)(unsigned long)alg_id, 1); +} +EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); + +static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry, + const void *data) { - return xfrm_get_byname(aalg_list, aalg_entries(), name, probe); + const char *name = data; + + return name && (!strcmp(name, entry->name) || + (entry->compat && !strcmp(name, entry->compat))); +} + +struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe) +{ + return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name, + probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); -struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe) { - return xfrm_get_byname(ealg_list, ealg_entries(), name, probe); + return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name, + probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); -struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe) { - return xfrm_get_byname(calg_list, calg_entries(), name, probe); + return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name, + probe); } EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); +struct xfrm_aead_name { + const char *name; + int icvbits; +}; + +static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry, + const void *data) +{ + const struct xfrm_aead_name *aead = data; + const char *name = aead->name; + + return aead->icvbits == entry->uinfo.aead.icv_truncbits && name && + !strcmp(name, entry->name); +} + +struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe) +{ + struct xfrm_aead_name data = { + .name = name, + .icvbits = icv_len, + }; + + return xfrm_find_algo(&xfrm_aead_list, xfrm_aead_name_match, &data, + probe); +} +EXPORT_SYMBOL_GPL(xfrm_aead_get_byname); + struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx) { if (idx >= aalg_entries()) @@ -422,306 +754,52 @@ EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx); */ void xfrm_probe_algs(void) { -#ifdef CONFIG_CRYPTO int i, status; - + BUG_ON(in_softirq()); for (i = 0; i < aalg_entries(); i++) { - status = crypto_alg_available(aalg_list[i].name, 0); + status = crypto_has_hash(aalg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (aalg_list[i].available != status) aalg_list[i].available = status; } - + for (i = 0; i < ealg_entries(); i++) { - status = crypto_alg_available(ealg_list[i].name, 0); + status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0); if (ealg_list[i].available != status) ealg_list[i].available = status; } - + for (i = 0; i < calg_entries(); i++) { - status = crypto_alg_available(calg_list[i].name, 0); + status = crypto_has_comp(calg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (calg_list[i].available != status) calg_list[i].available = status; } -#endif } EXPORT_SYMBOL_GPL(xfrm_probe_algs); -int xfrm_count_auth_supported(void) +int xfrm_count_pfkey_auth_supported(void) { int i, n; for (i = 0, n = 0; i < aalg_entries(); i++) - if (aalg_list[i].available) + if (aalg_list[i].available && aalg_list[i].pfkey_supported) n++; return n; } -EXPORT_SYMBOL_GPL(xfrm_count_auth_supported); +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_auth_supported); -int xfrm_count_enc_supported(void) +int xfrm_count_pfkey_enc_supported(void) { int i, n; for (i = 0, n = 0; i < ealg_entries(); i++) - if (ealg_list[i].available) + if (ealg_list[i].available && ealg_list[i].pfkey_supported) n++; return n; } -EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); - -/* Move to common area: it is shared with AH. */ - -void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, - int offset, int len, icv_update_fn_t icv_update) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - struct scatterlist sg; - - /* Checksum header. */ - if (copy > 0) { - if (copy > len) - copy = len; - - sg.page = virt_to_page(skb->data + offset); - sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; - sg.length = copy; - - icv_update(tfm, &sg, 1); - - if ((len -= copy) == 0) - return; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - - sg.page = frag->page; - sg.offset = frag->page_offset + offset-start; - sg.length = copy; - - icv_update(tfm, &sg, 1); - - if (!(len -= copy)) - return; - offset += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - skb_icv_walk(list, tfm, offset-start, copy, icv_update); - if ((len -= copy) == 0) - return; - offset += copy; - } - start = end; - } - } - BUG_ON(len); -} -EXPORT_SYMBOL_GPL(skb_icv_walk); - -#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) - -/* Looking generic it is not used in another places. */ - -int -skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int elt = 0; - - if (copy > 0) { - if (copy > len) - copy = len; - sg[elt].page = virt_to_page(skb->data + offset); - sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; - sg[elt].length = copy; - elt++; - if ((len -= copy) == 0) - return elt; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - sg[elt].page = frag->page; - sg[elt].offset = frag->page_offset+offset-start; - sg[elt].length = copy; - elt++; - if (!(len -= copy)) - return elt; - offset += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - elt += skb_to_sgvec(list, sg+elt, offset - start, copy); - if ((len -= copy) == 0) - return elt; - offset += copy; - } - start = end; - } - } - BUG_ON(len); - return elt; -} -EXPORT_SYMBOL_GPL(skb_to_sgvec); +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported); -/* Check that skb data bits are writable. If they are not, copy data - * to newly created private area. If "tailbits" is given, make sure that - * tailbits bytes beyond current end of skb are writable. - * - * Returns amount of elements of scatterlist to load for subsequent - * transformations and pointer to writable trailer skb. - */ - -int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) -{ - int copyflag; - int elt; - struct sk_buff *skb1, **skb_p; - - /* If skb is cloned or its head is paged, reallocate - * head pulling out all the pages (pages are considered not writable - * at the moment even if they are anonymous). - */ - if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && - __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) - return -ENOMEM; - - /* Easy case. Most of packets will go this way. */ - if (!skb_shinfo(skb)->frag_list) { - /* A little of trouble, not enough of space for trailer. - * This should not happen, when stack is tuned to generate - * good frames. OK, on miss we reallocate and reserve even more - * space, 128 bytes is fair. */ - - if (skb_tailroom(skb) < tailbits && - pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) - return -ENOMEM; - - /* Voila! */ - *trailer = skb; - return 1; - } - - /* Misery. We are in troubles, going to mincer fragments... */ - - elt = 1; - skb_p = &skb_shinfo(skb)->frag_list; - copyflag = 0; - - while ((skb1 = *skb_p) != NULL) { - int ntail = 0; - - /* The fragment is partially pulled by someone, - * this can happen on input. Copy it and everything - * after it. */ - - if (skb_shared(skb1)) - copyflag = 1; - - /* If the skb is the last, worry about trailer. */ - - if (skb1->next == NULL && tailbits) { - if (skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list || - skb_tailroom(skb1) < tailbits) - ntail = tailbits + 128; - } - - if (copyflag || - skb_cloned(skb1) || - ntail || - skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list) { - struct sk_buff *skb2; - - /* Fuck, we are miserable poor guys... */ - if (ntail == 0) - skb2 = skb_copy(skb1, GFP_ATOMIC); - else - skb2 = skb_copy_expand(skb1, - skb_headroom(skb1), - ntail, - GFP_ATOMIC); - if (unlikely(skb2 == NULL)) - return -ENOMEM; - - if (skb1->sk) - skb_set_owner_w(skb2, skb1->sk); - - /* Looking around. Are we still alive? - * OK, link new skb, drop old one */ - - skb2->next = skb1->next; - *skb_p = skb2; - kfree_skb(skb1); - skb1 = skb2; - } - elt++; - *trailer = skb1; - skb_p = &skb1->next; - } - - return elt; -} -EXPORT_SYMBOL_GPL(skb_cow_data); - -void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) -{ - if (tail != skb) { - skb->data_len += len; - skb->len += len; - } - return skb_put(tail, len); -} -EXPORT_SYMBOL_GPL(pskb_put); -#endif +MODULE_LICENSE("GPL"); diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c new file mode 100644 index 00000000000..1e98bc0fe0a --- /dev/null +++ b/net/xfrm/xfrm_hash.c @@ -0,0 +1,39 @@ +/* xfrm_hash.c: Common hash table code. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/bootmem.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/xfrm.h> + +#include "xfrm_hash.h" + +struct hlist_head *xfrm_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kzalloc(sz, GFP_KERNEL); + else if (hashdist) + n = vzalloc(sz); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + get_order(sz)); + + return n; +} + +void xfrm_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h new file mode 100644 index 00000000000..0622d319e1f --- /dev/null +++ b/net/xfrm/xfrm_hash.h @@ -0,0 +1,136 @@ +#ifndef _XFRM_HASH_H +#define _XFRM_HASH_H + +#include <linux/xfrm.h> +#include <linux/socket.h> + +static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr) +{ + return ntohl(addr->a4); +} + +static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr) +{ + return ntohl(addr->a6[2] ^ addr->a6[3]); +} + +static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr) +{ + u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; + return ntohl((__force __be32)sum); +} + +static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr) +{ + return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); +} + +static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + u32 reqid, unsigned short family, + unsigned int hmask) +{ + unsigned int h = family ^ reqid; + switch (family) { + case AF_INET: + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + case AF_INET6: + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + } + return (h ^ (h >> 16)) & hmask; +} + +static inline unsigned int __xfrm_src_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, + unsigned int hmask) +{ + unsigned int h = family; + switch (family) { + case AF_INET: + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + case AF_INET6: + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + } + return (h ^ (h >> 16)) & hmask; +} + +static inline unsigned int +__xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto, + unsigned short family, unsigned int hmask) +{ + unsigned int h = (__force u32)spi ^ proto; + switch (family) { + case AF_INET: + h ^= __xfrm4_addr_hash(daddr); + break; + case AF_INET6: + h ^= __xfrm6_addr_hash(daddr); + break; + } + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; +} + +static inline unsigned int __idx_hash(u32 index, unsigned int hmask) +{ + return (index ^ (index >> 8)) & hmask; +} + +static inline unsigned int __sel_hash(const struct xfrm_selector *sel, + unsigned short family, unsigned int hmask) +{ + const xfrm_address_t *daddr = &sel->daddr; + const xfrm_address_t *saddr = &sel->saddr; + unsigned int h = 0; + + switch (family) { + case AF_INET: + if (sel->prefixlen_d != 32 || + sel->prefixlen_s != 32) + return hmask + 1; + + h = __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + + case AF_INET6: + if (sel->prefixlen_d != 128 || + sel->prefixlen_s != 128) + return hmask + 1; + + h = __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + } + h ^= (h >> 16); + return h & hmask; +} + +static inline unsigned int __addr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, unsigned int hmask) +{ + unsigned int h = 0; + + switch (family) { + case AF_INET: + h = __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + + case AF_INET6: + h = __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + } + h ^= (h >> 16); + return h & hmask; +} + +struct hlist_head *xfrm_hash_alloc(unsigned int sz); +void xfrm_hash_free(struct hlist_head *n, unsigned int sz); + +#endif /* _XFRM_HASH_H */ diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 2407a707232..85d1d476461 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -4,21 +4,98 @@ * Changes: * YOSHIFUJI Hideaki @USAGI * Split up af-specific portion - * + * */ #include <linux/slab.h> #include <linux/module.h> +#include <linux/netdevice.h> +#include <net/dst.h> #include <net/ip.h> #include <net/xfrm.h> -static kmem_cache_t *secpath_cachep __read_mostly; +static struct kmem_cache *secpath_cachep __read_mostly; + +static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); +static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; + +int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) +{ + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); + if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) + err = -ENOBUFS; + else + rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); + spin_unlock_bh(&xfrm_input_afinfo_lock); + return err; +} +EXPORT_SYMBOL(xfrm_input_register_afinfo); + +int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) +{ + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); + if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { + if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) + err = -EINVAL; + else + RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); + } + spin_unlock_bh(&xfrm_input_afinfo_lock); + synchronize_rcu(); + return err; +} +EXPORT_SYMBOL(xfrm_input_unregister_afinfo); + +static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +{ + struct xfrm_input_afinfo *afinfo; + + if (unlikely(family >= NPROTO)) + return NULL; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_input_afinfo[family]); + if (unlikely(!afinfo)) + rcu_read_unlock(); + return afinfo; +} + +static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo) +{ + rcu_read_unlock(); +} + +static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, + int err) +{ + int ret; + struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + + if (!afinfo) + return -EAFNOSUPPORT; + + ret = afinfo->callback(skb, protocol, err); + xfrm_input_put_afinfo(afinfo); + + return ret; +} void __secpath_destroy(struct sec_path *sp) { int i; for (i = 0; i < sp->len; i++) - xfrm_state_put(sp->x[i].xvec); + xfrm_state_put(sp->xvec[i]); kmem_cache_free(secpath_cachep, sp); } EXPORT_SYMBOL(__secpath_destroy); @@ -27,7 +104,7 @@ struct sec_path *secpath_dup(struct sec_path *src) { struct sec_path *sp; - sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC); + sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC); if (!sp) return NULL; @@ -37,7 +114,7 @@ struct sec_path *secpath_dup(struct sec_path *src) memcpy(sp, src, sizeof(*sp)); for (i = 0; i < sp->len; i++) - xfrm_state_hold(sp->x[i].xvec); + xfrm_state_hold(sp->xvec[i]); } atomic_set(&sp->refcnt, 1); return sp; @@ -46,44 +123,259 @@ EXPORT_SYMBOL(secpath_dup); /* Fetch spi and seq from ipsec header */ -int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { int offset, offset_seq; + int hlen; switch (nexthdr) { case IPPROTO_AH: + hlen = sizeof(struct ip_auth_hdr); offset = offsetof(struct ip_auth_hdr, spi); offset_seq = offsetof(struct ip_auth_hdr, seq_no); break; case IPPROTO_ESP: + hlen = sizeof(struct ip_esp_hdr); offset = offsetof(struct ip_esp_hdr, spi); offset_seq = offsetof(struct ip_esp_hdr, seq_no); break; case IPPROTO_COMP: if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) return -EINVAL; - *spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2))); + *spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2))); *seq = 0; return 0; default: return 1; } - if (!pskb_may_pull(skb, 16)) + if (!pskb_may_pull(skb, hlen)) return -EINVAL; - *spi = *(u32*)(skb->h.raw + offset); - *seq = *(u32*)(skb->h.raw + offset_seq); + *spi = *(__be32 *)(skb_transport_header(skb) + offset); + *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); + return 0; +} + +int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_mode *inner_mode = x->inner_mode; + int err; + + err = x->outer_mode->afinfo->extract_input(x, skb); + if (err) + return err; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) + return -EAFNOSUPPORT; + } + + skb->protocol = inner_mode->afinfo->eth_proto; + return inner_mode->input2(x, skb); +} +EXPORT_SYMBOL(xfrm_prepare_input); + +int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) +{ + struct net *net = dev_net(skb->dev); + int err; + __be32 seq; + __be32 seq_hi; + struct xfrm_state *x = NULL; + xfrm_address_t *daddr; + struct xfrm_mode *inner_mode; + unsigned int family; + int decaps = 0; + int async = 0; + + /* A negative encap_type indicates async resumption. */ + if (encap_type < 0) { + async = 1; + x = xfrm_input_state(skb); + seq = XFRM_SKB_CB(skb)->seq.input.low; + family = x->outer_mode->afinfo->family; + goto resume; + } + + daddr = (xfrm_address_t *)(skb_network_header(skb) + + XFRM_SPI_SKB_CB(skb)->daddroff); + family = XFRM_SPI_SKB_CB(skb)->family; + + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + struct sec_path *sp; + + sp = secpath_dup(skb->sp); + if (!sp) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; + } + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + + seq = 0; + if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); + goto drop; + } + + do { + if (skb->sp->len == XFRM_MAX_DEPTH) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + goto drop; + } + + x = xfrm_state_lookup(net, skb->mark, daddr, spi, nexthdr, family); + if (x == NULL) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); + xfrm_audit_state_notfound(skb, family, spi, seq); + goto drop; + } + + skb->sp->xvec[skb->sp->len++] = x; + + if (xfrm_tunnel_check(skb, x, family)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); + goto drop; + } + + spin_lock(&x->lock); + if (unlikely(x->km.state == XFRM_STATE_ACQ)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); + goto drop_unlock; + } + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); + goto drop_unlock; + } + + if ((x->encap ? x->encap->encap_type : 0) != encap_type) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); + goto drop_unlock; + } + + if (x->repl->check(x, skb, seq)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); + goto drop_unlock; + } + + if (xfrm_state_check_expire(x)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED); + goto drop_unlock; + } + + spin_unlock(&x->lock); + + seq_hi = htonl(xfrm_replay_seqhi(x, seq)); + + XFRM_SKB_CB(skb)->seq.input.low = seq; + XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; + + skb_dst_force(skb); + + nexthdr = x->type->input(x, skb); + + if (nexthdr == -EINPROGRESS) + return 0; +resume: + spin_lock(&x->lock); + if (nexthdr <= 0) { + if (nexthdr == -EBADMSG) { + xfrm_audit_state_icvfail(x, skb, + x->type->proto); + x->stats.integrity_failed++; + } + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); + goto drop_unlock; + } + + /* only the first xfrm gets the encap type */ + encap_type = 0; + + if (async && x->repl->recheck(x, skb, seq)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); + goto drop_unlock; + } + + x->repl->advance(x, seq); + + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock(&x->lock); + + XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; + + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) + goto drop; + } + + if (inner_mode->input(x, skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); + goto drop; + } + + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { + decaps = 1; + break; + } + + /* + * We need the inner address. However, we only get here for + * transport mode so the outer address is identical. + */ + daddr = &x->id.daddr; + family = x->outer_mode->afinfo->family; + + err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); + if (err < 0) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); + goto drop; + } + } while (!err); + + err = xfrm_rcv_cb(skb, family, x->type->proto, 0); + if (err) + goto drop; + + nf_reset(skb); + + if (decaps) { + skb_dst_drop(skb); + netif_rx(skb); + return 0; + } else { + return x->inner_mode->afinfo->transport_finish(skb, async); + } + +drop_unlock: + spin_unlock(&x->lock); +drop: + xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); + kfree_skb(skb); return 0; } -EXPORT_SYMBOL(xfrm_parse_spi); +EXPORT_SYMBOL(xfrm_input); + +int xfrm_input_resume(struct sk_buff *skb, int nexthdr) +{ + return xfrm_input(skb, nexthdr, 0, -1); +} +EXPORT_SYMBOL(xfrm_input_resume); void __init xfrm_input_init(void) { secpath_cachep = kmem_cache_create("secpath_cache", sizeof(struct sec_path), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!secpath_cachep) - panic("XFRM: failed to allocate secpath_cache\n"); + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL); } diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c new file mode 100644 index 00000000000..ccfdc7115a8 --- /dev/null +++ b/net/xfrm/xfrm_ipcomp.c @@ -0,0 +1,386 @@ +/* + * IP Payload Compression Protocol (IPComp) - RFC3173. + * + * Copyright (c) 2003 James Morris <jmorris@intercode.com.au> + * Copyright (c) 2003-2008 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Todo: + * - Tunable compression parameters. + * - Compression stats. + * - Adaptive compression. + */ + +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/vmalloc.h> +#include <net/ip.h> +#include <net/ipcomp.h> +#include <net/xfrm.h> + +struct ipcomp_tfms { + struct list_head list; + struct crypto_comp * __percpu *tfms; + int users; +}; + +static DEFINE_MUTEX(ipcomp_resource_mutex); +static void * __percpu *ipcomp_scratches; +static int ipcomp_scratch_users; +static LIST_HEAD(ipcomp_tfms_list); + +static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + const u8 *start = skb->data; + const int cpu = get_cpu(); + u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); + struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); + int len; + + if (err) + goto out; + + if (dlen < (plen + sizeof(struct ip_comp_hdr))) { + err = -EINVAL; + goto out; + } + + len = dlen - plen; + if (len > skb_tailroom(skb)) + len = skb_tailroom(skb); + + __skb_put(skb, len); + + len += plen; + skb_copy_to_linear_data(skb, scratch, len); + + while ((scratch += len, dlen -= len) > 0) { + skb_frag_t *frag; + struct page *page; + + err = -EMSGSIZE; + if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) + goto out; + + frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; + page = alloc_page(GFP_ATOMIC); + + err = -ENOMEM; + if (!page) + goto out; + + __skb_frag_set_page(frag, page); + + len = PAGE_SIZE; + if (dlen < len) + len = dlen; + + frag->page_offset = 0; + skb_frag_size_set(frag, len); + memcpy(skb_frag_address(frag), scratch, len); + + skb->truesize += len; + skb->data_len += len; + skb->len += len; + + skb_shinfo(skb)->nr_frags++; + } + + err = 0; + +out: + put_cpu(); + return err; +} + +int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int nexthdr; + int err = -ENOMEM; + struct ip_comp_hdr *ipch; + + if (skb_linearize_cow(skb)) + goto out; + + skb->ip_summed = CHECKSUM_NONE; + + /* Remove ipcomp header and decompress original payload */ + ipch = (void *)skb->data; + nexthdr = ipch->nexthdr; + + skb->transport_header = skb->network_header + sizeof(*ipch); + __skb_pull(skb, sizeof(*ipch)); + err = ipcomp_decompress(x, skb); + if (err) + goto out; + + err = nexthdr; + +out: + return err; +} +EXPORT_SYMBOL_GPL(ipcomp_input); + +static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + u8 *start = skb->data; + struct crypto_comp *tfm; + u8 *scratch; + int err; + + local_bh_disable(); + scratch = *this_cpu_ptr(ipcomp_scratches); + tfm = *this_cpu_ptr(ipcd->tfms); + err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); + if (err) + goto out; + + if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) { + err = -EMSGSIZE; + goto out; + } + + memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); + local_bh_enable(); + + pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); + return 0; + +out: + local_bh_enable(); + return err; +} + +int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + struct ip_comp_hdr *ipch; + struct ipcomp_data *ipcd = x->data; + + if (skb->len < ipcd->threshold) { + /* Don't bother compressing */ + goto out_ok; + } + + if (skb_linearize_cow(skb)) + goto out_ok; + + err = ipcomp_compress(x, skb); + + if (err) { + goto out_ok; + } + + /* Install ipcomp header, convert into ipcomp datagram. */ + ipch = ip_comp_hdr(skb); + ipch->nexthdr = *skb_mac_header(skb); + ipch->flags = 0; + ipch->cpi = htons((u16 )ntohl(x->id.spi)); + *skb_mac_header(skb) = IPPROTO_COMP; +out_ok: + skb_push(skb, -skb_network_offset(skb)); + return 0; +} +EXPORT_SYMBOL_GPL(ipcomp_output); + +static void ipcomp_free_scratches(void) +{ + int i; + void * __percpu *scratches; + + if (--ipcomp_scratch_users) + return; + + scratches = ipcomp_scratches; + if (!scratches) + return; + + for_each_possible_cpu(i) + vfree(*per_cpu_ptr(scratches, i)); + + free_percpu(scratches); +} + +static void * __percpu *ipcomp_alloc_scratches(void) +{ + void * __percpu *scratches; + int i; + + if (ipcomp_scratch_users++) + return ipcomp_scratches; + + scratches = alloc_percpu(void *); + if (!scratches) + return NULL; + + ipcomp_scratches = scratches; + + for_each_possible_cpu(i) { + void *scratch; + + scratch = vmalloc_node(IPCOMP_SCRATCH_SIZE, cpu_to_node(i)); + if (!scratch) + return NULL; + *per_cpu_ptr(scratches, i) = scratch; + } + + return scratches; +} + +static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms) +{ + struct ipcomp_tfms *pos; + int cpu; + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + if (pos->tfms == tfms) + break; + } + + WARN_ON(!pos); + + if (--pos->users) + return; + + list_del(&pos->list); + kfree(pos); + + if (!tfms) + return; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); + } + free_percpu(tfms); +} + +static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name) +{ + struct ipcomp_tfms *pos; + struct crypto_comp * __percpu *tfms; + int cpu; + + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + struct crypto_comp *tfm; + + /* This can be any valid CPU ID so we don't need locking. */ + tfm = __this_cpu_read(*pos->tfms); + + if (!strcmp(crypto_comp_name(tfm), alg_name)) { + pos->users++; + return pos->tfms; + } + } + + pos = kmalloc(sizeof(*pos), GFP_KERNEL); + if (!pos) + return NULL; + + pos->users = 1; + INIT_LIST_HEAD(&pos->list); + list_add(&pos->list, &ipcomp_tfms_list); + + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); + if (!tfms) + goto error; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + *per_cpu_ptr(tfms, cpu) = tfm; + } + + return tfms; + +error: + ipcomp_free_tfms(tfms); + return NULL; +} + +static void ipcomp_free_data(struct ipcomp_data *ipcd) +{ + if (ipcd->tfms) + ipcomp_free_tfms(ipcd->tfms); + ipcomp_free_scratches(); +} + +void ipcomp_destroy(struct xfrm_state *x) +{ + struct ipcomp_data *ipcd = x->data; + if (!ipcd) + return; + xfrm_state_delete_tunnel(x); + mutex_lock(&ipcomp_resource_mutex); + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); +} +EXPORT_SYMBOL_GPL(ipcomp_destroy); + +int ipcomp_init_state(struct xfrm_state *x) +{ + int err; + struct ipcomp_data *ipcd; + struct xfrm_algo_desc *calg_desc; + + err = -EINVAL; + if (!x->calg) + goto out; + + if (x->encap) + goto out; + + err = -ENOMEM; + ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); + if (!ipcd) + goto out; + + mutex_lock(&ipcomp_resource_mutex); + if (!ipcomp_alloc_scratches()) + goto error; + + ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name); + if (!ipcd->tfms) + goto error; + mutex_unlock(&ipcomp_resource_mutex); + + calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); + BUG_ON(!calg_desc); + ipcd->threshold = calg_desc->uinfo.comp.threshold; + x->data = ipcd; + err = 0; +out: + return err; + +error: + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); + goto out; +} +EXPORT_SYMBOL_GPL(ipcomp_init_state); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); +MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c new file mode 100644 index 00000000000..c51e8f7b865 --- /dev/null +++ b/net/xfrm/xfrm_output.c @@ -0,0 +1,238 @@ +/* + * xfrm_output.c - Common IPsec encapsulation code. + * + * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/netfilter.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <net/dst.h> +#include <net/xfrm.h> + +static int xfrm_output2(struct sk_buff *skb); + +static int xfrm_skb_check_space(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) + - skb_headroom(skb); + int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); + + if (nhead <= 0) { + if (ntail <= 0) + return 0; + nhead = 0; + } else if (ntail < 0) + ntail = 0; + + return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); +} + +static int xfrm_output_one(struct sk_buff *skb, int err) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + struct net *net = xs_net(x); + + if (err <= 0) + goto resume; + + do { + err = xfrm_skb_check_space(skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + goto error_nolock; + } + + err = x->outer_mode->output(x, skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); + goto error_nolock; + } + + spin_lock_bh(&x->lock); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); + err = -EINVAL; + goto error; + } + + err = xfrm_state_check_expire(x); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); + goto error; + } + + err = x->repl->overflow(x, skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); + goto error; + } + + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock_bh(&x->lock); + + skb_dst_force(skb); + + err = x->type->output(x, skb); + if (err == -EINPROGRESS) + goto out; + +resume: + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); + goto error_nolock; + } + + dst = skb_dst_pop(skb); + if (!dst) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + err = -EHOSTUNREACH; + goto error_nolock; + } + skb_dst_set(skb, dst); + x = dst->xfrm; + } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); + + return 0; + +error: + spin_unlock_bh(&x->lock); +error_nolock: + kfree_skb(skb); +out: + return err; +} + +int xfrm_output_resume(struct sk_buff *skb, int err) +{ + while (likely((err = xfrm_output_one(skb, err)) == 0)) { + nf_reset(skb); + + err = skb_dst(skb)->ops->local_out(skb); + if (unlikely(err != 1)) + goto out; + + if (!skb_dst(skb)->xfrm) + return dst_output(skb); + + err = nf_hook(skb_dst(skb)->ops->family, + NF_INET_POST_ROUTING, skb, + NULL, skb_dst(skb)->dev, xfrm_output2); + if (unlikely(err != 1)) + goto out; + } + + if (err == -EINPROGRESS) + err = 0; + +out: + return err; +} +EXPORT_SYMBOL_GPL(xfrm_output_resume); + +static int xfrm_output2(struct sk_buff *skb) +{ + return xfrm_output_resume(skb, 1); +} + +static int xfrm_output_gso(struct sk_buff *skb) +{ + struct sk_buff *segs; + + segs = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (IS_ERR(segs)) + return PTR_ERR(segs); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = xfrm_output2(segs); + + if (unlikely(err)) { + while ((segs = nskb)) { + nskb = segs->next; + segs->next = NULL; + kfree_skb(segs); + } + return err; + } + + segs = nskb; + } while (segs); + + return 0; +} + +int xfrm_output(struct sk_buff *skb) +{ + struct net *net = dev_net(skb_dst(skb)->dev); + int err; + + if (skb_is_gso(skb)) + return xfrm_output_gso(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb); + return err; + } + } + + return xfrm_output2(skb); +} +EXPORT_SYMBOL_GPL(xfrm_output); + +int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_mode *inner_mode; + if (x->sel.family == AF_UNSPEC) + inner_mode = xfrm_ip2inner_mode(x, + xfrm_af2proto(skb_dst(skb)->ops->family)); + else + inner_mode = x->inner_mode; + + if (inner_mode == NULL) + return -EAFNOSUPPORT; + return inner_mode->afinfo->extract_output(x, skb); +} +EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); + +void xfrm_local_error(struct sk_buff *skb, int mtu) +{ + unsigned int proto; + struct xfrm_state_afinfo *afinfo; + + if (skb->protocol == htons(ETH_P_IP)) + proto = AF_INET; + else if (skb->protocol == htons(ETH_P_IPV6)) + proto = AF_INET6; + else + return; + + afinfo = xfrm_state_get_afinfo(proto); + if (!afinfo) + return; + + afinfo->local_error(skb, mtu); + xfrm_state_put_afinfo(afinfo); +} +EXPORT_SYMBOL_GPL(xfrm_local_error); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 077bbf9fb9b..0525d78ba32 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1,4 +1,4 @@ -/* +/* * xfrm_policy.c * * Changes: @@ -13,8 +13,7 @@ * */ -#include <asm/bug.h> -#include <linux/config.h> +#include <linux/err.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/list.h> @@ -24,123 +23,140 @@ #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/module.h> +#include <linux/cache.h> +#include <linux/audit.h> +#include <net/dst.h> +#include <net/flow.h> #include <net/xfrm.h> #include <net/ip.h> +#ifdef CONFIG_XFRM_STATISTICS +#include <net/snmp.h> +#endif -DECLARE_MUTEX(xfrm_cfg_sem); -EXPORT_SYMBOL(xfrm_cfg_sem); +#include "xfrm_hash.h" -static DEFINE_RWLOCK(xfrm_policy_lock); +#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) +#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) +#define XFRM_MAX_QUEUE_LEN 100 -struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_list); +static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); +static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] + __read_mostly; -static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); -static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; +static struct kmem_cache *xfrm_dst_cache __read_mostly; -static kmem_cache_t *xfrm_dst_cache __read_mostly; - -static struct work_struct xfrm_policy_gc_work; -static struct list_head xfrm_policy_gc_list = - LIST_HEAD_INIT(xfrm_policy_gc_list); -static DEFINE_SPINLOCK(xfrm_policy_gc_lock); +static void xfrm_init_pmtu(struct dst_entry *dst); +static int stale_bundle(struct dst_entry *dst); +static int xfrm_bundle_ok(struct xfrm_dst *xdst); +static void xfrm_policy_queue_process(unsigned long arg); -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); +static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, + int dir); -int xfrm_register_type(struct xfrm_type *type, unsigned short family) +static inline bool +__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - struct xfrm_type_map *typemap; - int err = 0; + const struct flowi4 *fl4 = &fl->u.ip4; + + return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && + addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && + (fl4->flowi4_proto == sel->proto || !sel->proto) && + (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); +} - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; +static inline bool +__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) +{ + const struct flowi6 *fl6 = &fl->u.ip6; + + return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && + (fl6->flowi6_proto == sel->proto || !sel->proto) && + (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); +} - write_lock(&typemap->lock); - if (likely(typemap->map[type->proto] == NULL)) - typemap->map[type->proto] = type; - else - err = -EEXIST; - write_unlock(&typemap->lock); - xfrm_policy_put_afinfo(afinfo); - return err; +bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, + unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_selector_match(sel, fl); + case AF_INET6: + return __xfrm6_selector_match(sel, fl); + } + return false; } -EXPORT_SYMBOL(xfrm_register_type); -int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) +static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - struct xfrm_type_map *typemap; - int err = 0; + struct xfrm_policy_afinfo *afinfo; - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; + if (unlikely(family >= NPROTO)) + return NULL; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[family]); + if (unlikely(!afinfo)) + rcu_read_unlock(); + return afinfo; +} - write_lock(&typemap->lock); - if (unlikely(typemap->map[type->proto] != type)) - err = -ENOENT; - else - typemap->map[type->proto] = NULL; - write_unlock(&typemap->lock); - xfrm_policy_put_afinfo(afinfo); - return err; +static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + rcu_read_unlock(); } -EXPORT_SYMBOL(xfrm_unregister_type); -struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, + int family) { struct xfrm_policy_afinfo *afinfo; - struct xfrm_type_map *typemap; - struct xfrm_type *type; - int modload_attempted = 0; + struct dst_entry *dst; -retry: afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) - return NULL; - typemap = afinfo->type_map; + return ERR_PTR(-EAFNOSUPPORT); - read_lock(&typemap->lock); - type = typemap->map[proto]; - if (unlikely(type && !try_module_get(type->owner))) - type = NULL; - read_unlock(&typemap->lock); - if (!type && !modload_attempted) { - xfrm_policy_put_afinfo(afinfo); - request_module("xfrm-type-%d-%d", - (int) family, (int) proto); - modload_attempted = 1; - goto retry; - } + dst = afinfo->dst_lookup(net, tos, saddr, daddr); xfrm_policy_put_afinfo(afinfo); - return type; + + return dst; } -int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, - unsigned short family) +static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, + xfrm_address_t *prev_saddr, + xfrm_address_t *prev_daddr, + int family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - int err = 0; + struct net *net = xs_net(x); + xfrm_address_t *saddr = &x->props.saddr; + xfrm_address_t *daddr = &x->id.daddr; + struct dst_entry *dst; - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; + if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { + saddr = x->coaddr; + daddr = prev_daddr; + } + if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { + saddr = prev_saddr; + daddr = x->coaddr; + } - if (likely(afinfo->dst_lookup != NULL)) - err = afinfo->dst_lookup(dst, fl); - else - err = -EINVAL; - xfrm_policy_put_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_dst_lookup); + dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); -void xfrm_put_type(struct xfrm_type *type) -{ - module_put(type->owner); + if (!IS_ERR(dst)) { + if (prev_saddr != saddr) + memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); + if (prev_daddr != daddr) + memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); + } + + return dst; } static inline unsigned long make_jiffies(long secs) @@ -148,20 +164,20 @@ static inline unsigned long make_jiffies(long secs) if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) return MAX_SCHEDULE_TIMEOUT-1; else - return secs*HZ; + return secs*HZ; } static void xfrm_policy_timer(unsigned long data) { - struct xfrm_policy *xp = (struct xfrm_policy*)data; - unsigned long now = (unsigned long)xtime.tv_sec; + struct xfrm_policy *xp = (struct xfrm_policy *)data; + unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; int dir; read_lock(&xp->lock); - if (xp->dead) + if (unlikely(xp->walk.dead)) goto out; dir = xfrm_policy_id2dir(xp->index); @@ -204,7 +220,7 @@ static void xfrm_policy_timer(unsigned long data) } if (warn) - km_policy_expired(xp, dir, 0); + km_policy_expired(xp, dir, 0, 0); if (next != LONG_MAX && !mod_timer(&xp->timer, jiffies + make_jiffies(next))) xfrm_pol_hold(xp); @@ -217,28 +233,63 @@ out: expired: read_unlock(&xp->lock); if (!xfrm_policy_delete(xp, dir)) - km_policy_expired(xp, dir, 1); + km_policy_expired(xp, dir, 1, 0); xfrm_pol_put(xp); } +static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) +{ + struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); + + if (unlikely(pol->walk.dead)) + flo = NULL; + else + xfrm_pol_hold(pol); + + return flo; +} + +static int xfrm_policy_flo_check(struct flow_cache_object *flo) +{ + struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); + + return !pol->walk.dead; +} + +static void xfrm_policy_flo_delete(struct flow_cache_object *flo) +{ + xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); +} + +static const struct flow_cache_ops xfrm_policy_fc_ops = { + .get = xfrm_policy_flo_get, + .check = xfrm_policy_flo_check, + .delete = xfrm_policy_flo_delete, +}; /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ -struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) +struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) { struct xfrm_policy *policy; - policy = kmalloc(sizeof(struct xfrm_policy), gfp); + policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - memset(policy, 0, sizeof(struct xfrm_policy)); - atomic_set(&policy->refcnt, 1); + write_pnet(&policy->xp_net, net); + INIT_LIST_HEAD(&policy->walk.all); + INIT_HLIST_NODE(&policy->bydst); + INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); - init_timer(&policy->timer); - policy->timer.data = (unsigned long)policy; - policy->timer.function = xfrm_policy_timer; + atomic_set(&policy->refcnt, 1); + skb_queue_head_init(&policy->polq.hold_queue); + setup_timer(&policy->timer, xfrm_policy_timer, + (unsigned long)policy); + setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, + (unsigned long)policy); + policy->flo.ops = &xfrm_policy_fc_ops; } return policy; } @@ -246,398 +297,906 @@ EXPORT_SYMBOL(xfrm_policy_alloc); /* Destroy xfrm_policy: descendant resources must be released to this moment. */ -void __xfrm_policy_destroy(struct xfrm_policy *policy) +void xfrm_policy_destroy(struct xfrm_policy *policy) { - BUG_ON(!policy->dead); + BUG_ON(!policy->walk.dead); - BUG_ON(policy->bundles); - - if (del_timer(&policy->timer)) + if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) BUG(); - security_xfrm_policy_free(policy); + security_xfrm_policy_free(policy->security); kfree(policy); } -EXPORT_SYMBOL(__xfrm_policy_destroy); +EXPORT_SYMBOL(xfrm_policy_destroy); -static void xfrm_policy_gc_kill(struct xfrm_policy *policy) +static void xfrm_queue_purge(struct sk_buff_head *list) { - struct dst_entry *dst; + struct sk_buff *skb; - while ((dst = policy->bundles) != NULL) { - policy->bundles = dst->next; - dst_free(dst); - } + while ((skb = skb_dequeue(list)) != NULL) + kfree_skb(skb); +} - if (del_timer(&policy->timer)) - atomic_dec(&policy->refcnt); +/* Rule must be locked. Release descentant resources, announce + * entry dead. The rule must be unlinked from lists to the moment. + */ - if (atomic_read(&policy->refcnt) > 1) - flow_cache_flush(); +static void xfrm_policy_kill(struct xfrm_policy *policy) +{ + policy->walk.dead = 1; + + atomic_inc(&policy->genid); + + if (del_timer(&policy->polq.hold_timer)) + xfrm_pol_put(policy); + xfrm_queue_purge(&policy->polq.hold_queue); + + if (del_timer(&policy->timer)) + xfrm_pol_put(policy); xfrm_pol_put(policy); } -static void xfrm_policy_gc_task(void *data) +static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; + +static inline unsigned int idx_hash(struct net *net, u32 index) +{ + return __idx_hash(index, net->xfrm.policy_idx_hmask); +} + +static struct hlist_head *policy_hash_bysel(struct net *net, + const struct xfrm_selector *sel, + unsigned short family, int dir) { - struct xfrm_policy *policy; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; + unsigned int hash = __sel_hash(sel, family, hmask); + + return (hash == hmask + 1 ? + &net->xfrm.policy_inexact[dir] : + net->xfrm.policy_bydst[dir].table + hash); +} + +static struct hlist_head *policy_hash_direct(struct net *net, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, int dir) +{ + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; + unsigned int hash = __addr_hash(daddr, saddr, family, hmask); - spin_lock_bh(&xfrm_policy_gc_lock); - list_splice_init(&xfrm_policy_gc_list, &gc_list); - spin_unlock_bh(&xfrm_policy_gc_lock); + return net->xfrm.policy_bydst[dir].table + hash; +} - list_for_each_safe(entry, tmp, &gc_list) { - policy = list_entry(entry, struct xfrm_policy, list); - xfrm_policy_gc_kill(policy); +static void xfrm_dst_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + unsigned int nhashmask) +{ + struct hlist_node *tmp, *entry0 = NULL; + struct xfrm_policy *pol; + unsigned int h0 = 0; + +redo: + hlist_for_each_entry_safe(pol, tmp, list, bydst) { + unsigned int h; + + h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, + pol->family, nhashmask); + if (!entry0) { + hlist_del(&pol->bydst); + hlist_add_head(&pol->bydst, ndsttable+h); + h0 = h; + } else { + if (h != h0) + continue; + hlist_del(&pol->bydst); + hlist_add_after(entry0, &pol->bydst); + } + entry0 = &pol->bydst; + } + if (!hlist_empty(list)) { + entry0 = NULL; + goto redo; } } -/* Rule must be locked. Release descentant resources, announce - * entry dead. The rule must be unlinked from lists to the moment. - */ +static void xfrm_idx_hash_transfer(struct hlist_head *list, + struct hlist_head *nidxtable, + unsigned int nhashmask) +{ + struct hlist_node *tmp; + struct xfrm_policy *pol; -static void xfrm_policy_kill(struct xfrm_policy *policy) + hlist_for_each_entry_safe(pol, tmp, list, byidx) { + unsigned int h; + + h = __idx_hash(pol->index, nhashmask); + hlist_add_head(&pol->byidx, nidxtable+h); + } +} + +static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) { - int dead; + return ((old_hmask + 1) << 1) - 1; +} - write_lock_bh(&policy->lock); - dead = policy->dead; - policy->dead = 1; - write_unlock_bh(&policy->lock); +static void xfrm_bydst_resize(struct net *net, int dir) +{ + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; + struct hlist_head *ndst = xfrm_hash_alloc(nsize); + int i; - if (unlikely(dead)) { - WARN_ON(1); + if (!ndst) return; - } - spin_lock(&xfrm_policy_gc_lock); - list_add(&policy->list, &xfrm_policy_gc_list); - spin_unlock(&xfrm_policy_gc_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); - schedule_work(&xfrm_policy_gc_work); + for (i = hmask; i >= 0; i--) + xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); + + net->xfrm.policy_bydst[dir].table = ndst; + net->xfrm.policy_bydst[dir].hmask = nhashmask; + + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + + xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); +} + +static void xfrm_byidx_resize(struct net *net, int total) +{ + unsigned int hmask = net->xfrm.policy_idx_hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *oidx = net->xfrm.policy_byidx; + struct hlist_head *nidx = xfrm_hash_alloc(nsize); + int i; + + if (!nidx) + return; + + write_lock_bh(&net->xfrm.xfrm_policy_lock); + + for (i = hmask; i >= 0; i--) + xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); + + net->xfrm.policy_byidx = nidx; + net->xfrm.policy_idx_hmask = nhashmask; + + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + + xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); +} + +static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) +{ + unsigned int cnt = net->xfrm.policy_count[dir]; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; + + if (total) + *total += cnt; + + if ((hmask + 1) < xfrm_policy_hashmax && + cnt > hmask) + return 1; + + return 0; +} + +static inline int xfrm_byidx_should_resize(struct net *net, int total) +{ + unsigned int hmask = net->xfrm.policy_idx_hmask; + + if ((hmask + 1) < xfrm_policy_hashmax && + total > hmask) + return 1; + + return 0; +} + +void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) +{ + read_lock_bh(&net->xfrm.xfrm_policy_lock); + si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; + si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; + si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->spdhcnt = net->xfrm.policy_idx_hmask; + si->spdhmcnt = xfrm_policy_hashmax; + read_unlock_bh(&net->xfrm.xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_spd_getinfo); + +static DEFINE_MUTEX(hash_resize_mutex); +static void xfrm_hash_resize(struct work_struct *work) +{ + struct net *net = container_of(work, struct net, xfrm.policy_hash_work); + int dir, total; + + mutex_lock(&hash_resize_mutex); + + total = 0; + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + if (xfrm_bydst_should_resize(net, dir, &total)) + xfrm_bydst_resize(net, dir); + } + if (xfrm_byidx_should_resize(net, total)) + xfrm_byidx_resize(net, total); + + mutex_unlock(&hash_resize_mutex); } /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(int dir) +static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { - u32 idx; - struct xfrm_policy *p; static u32 idx_generator; for (;;) { - idx = (idx_generator | dir); - idx_generator += 8; + struct hlist_head *list; + struct xfrm_policy *p; + u32 idx; + int found; + + if (!index) { + idx = (idx_generator | dir); + idx_generator += 8; + } else { + idx = index; + index = 0; + } + if (idx == 0) idx = 8; - for (p = xfrm_policy_list[dir]; p; p = p->next) { - if (p->index == idx) + list = net->xfrm.policy_byidx + idx_hash(net, idx); + found = 0; + hlist_for_each_entry(p, list, byidx) { + if (p->index == idx) { + found = 1; break; + } } - if (!p) + if (!found) return idx; } } -int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) +static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) +{ + u32 *p1 = (u32 *) s1; + u32 *p2 = (u32 *) s2; + int len = sizeof(struct xfrm_selector) / sizeof(u32); + int i; + + for (i = 0; i < len; i++) { + if (p1[i] != p2[i]) + return 1; + } + + return 0; +} + +static void xfrm_policy_requeue(struct xfrm_policy *old, + struct xfrm_policy *new) +{ + struct xfrm_policy_queue *pq = &old->polq; + struct sk_buff_head list; + + __skb_queue_head_init(&list); + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice_init(&pq->hold_queue, &list); + if (del_timer(&pq->hold_timer)) + xfrm_pol_put(old); + spin_unlock_bh(&pq->hold_queue.lock); + + if (skb_queue_empty(&list)) + return; + + pq = &new->polq; + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice(&list, &pq->hold_queue); + pq->timeout = XFRM_QUEUE_TMO_MIN; + if (!mod_timer(&pq->hold_timer, jiffies)) + xfrm_pol_hold(new); + spin_unlock_bh(&pq->hold_queue.lock); +} + +static bool xfrm_policy_mark_match(struct xfrm_policy *policy, + struct xfrm_policy *pol) { - struct xfrm_policy *pol, **p; - struct xfrm_policy *delpol = NULL; - struct xfrm_policy **newpos = NULL; - struct dst_entry *gc_list; + u32 mark = policy->mark.v & policy->mark.m; + + if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) + return true; + + if ((mark & pol->mark.m) == pol->mark.v && + policy->priority == pol->priority) + return true; - write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { - if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && - xfrm_sec_ctx_match(pol->security, policy->security)) { + return false; +} + +int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) +{ + struct net *net = xp_net(policy); + struct xfrm_policy *pol; + struct xfrm_policy *delpol; + struct hlist_head *chain; + struct hlist_node *newpos; + + write_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); + delpol = NULL; + newpos = NULL; + hlist_for_each_entry(pol, chain, bydst) { + if (pol->type == policy->type && + !selector_cmp(&pol->selector, &policy->selector) && + xfrm_policy_mark_match(policy, pol) && + xfrm_sec_ctx_match(pol->security, policy->security) && + !WARN_ON(delpol)) { if (excl) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return -EEXIST; } - *p = pol->next; delpol = pol; if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { - p = &pol->next; + newpos = &pol->bydst; continue; } - if (!newpos) - newpos = p; if (delpol) break; - p = &pol->next; } if (newpos) - p = newpos; + hlist_add_after(newpos, &policy->bydst); + else + hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - policy->next = *p; - *p = policy; - atomic_inc(&flow_cache_genid); - policy->index = delpol ? delpol->index : xfrm_gen_index(dir); - policy->curlft.add_time = (unsigned long)xtime.tv_sec; + net->xfrm.policy_count[dir]++; + atomic_inc(&net->xfrm.flow_cache_genid); + + /* After previous checking, family can either be AF_INET or AF_INET6 */ + if (policy->family == AF_INET) + rt_genid_bump_ipv4(net); + else + rt_genid_bump_ipv6(net); + + if (delpol) { + xfrm_policy_requeue(delpol, policy); + __xfrm_policy_unlink(delpol, dir); + } + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); + hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); + policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - write_unlock_bh(&xfrm_policy_lock); + list_add(&policy->walk.all, &net->xfrm.policy_all); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); - - read_lock_bh(&xfrm_policy_lock); - gc_list = NULL; - for (policy = policy->next; policy; policy = policy->next) { - struct dst_entry *dst; - - write_lock(&policy->lock); - dst = policy->bundles; - if (dst) { - struct dst_entry *tail = dst; - while (tail->next) - tail = tail->next; - tail->next = gc_list; - gc_list = dst; - - policy->bundles = NULL; - } - write_unlock(&policy->lock); - } - read_unlock_bh(&xfrm_policy_lock); - - while (gc_list) { - struct dst_entry *dst = gc_list; - - gc_list = dst->next; - dst_free(dst); - } + else if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); return 0; } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, - struct xfrm_sec_ctx *ctx, int delete) +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, + int dir, struct xfrm_selector *sel, + struct xfrm_sec_ctx *ctx, int delete, + int *err) { - struct xfrm_policy *pol, **p; - - write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { - if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && - (xfrm_sec_ctx_match(ctx, pol->security))) { + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + + *err = 0; + write_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = policy_hash_bysel(net, sel, sel->family, dir); + ret = NULL; + hlist_for_each_entry(pol, chain, bydst) { + if (pol->type == type && + (mark & pol->mark.m) == pol->mark.v && + !selector_cmp(sel, &pol->selector) && + xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + *err = security_xfrm_policy_delete( + pol->security); + if (*err) { + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + return pol; + } + __xfrm_policy_unlink(pol, dir); + } + ret = pol; break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); - if (pol && delete) { - atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); - } - return pol; + if (ret && delete) + xfrm_policy_kill(ret); + return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, + int dir, u32 id, int delete, int *err) { - struct xfrm_policy *pol, **p; + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + + *err = -ENOENT; + if (xfrm_policy_id2dir(id) != dir) + return NULL; - write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { - if (pol->index == id) { + *err = 0; + write_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = net->xfrm.policy_byidx + idx_hash(net, id); + ret = NULL; + hlist_for_each_entry(pol, chain, byidx) { + if (pol->type == type && pol->index == id && + (mark & pol->mark.m) == pol->mark.v) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + *err = security_xfrm_policy_delete( + pol->security); + if (*err) { + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + return pol; + } + __xfrm_policy_unlink(pol, dir); + } + ret = pol; break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); - if (pol && delete) { - atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); - } - return pol; + if (ret && delete) + xfrm_policy_kill(ret); + return ret; } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(void) +#ifdef CONFIG_SECURITY_NETWORK_XFRM +static inline int +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { - struct xfrm_policy *xp; - int dir; + int dir, err = 0; + + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { + struct xfrm_policy *pol; + int i; + + hlist_for_each_entry(pol, + &net->xfrm.policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + err = security_xfrm_policy_delete(pol->security); + if (err) { + xfrm_audit_policy_delete(pol, 0, task_valid); + return err; + } + } + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, + net->xfrm.policy_bydst[dir].table + i, + bydst) { + if (pol->type != type) + continue; + err = security_xfrm_policy_delete( + pol->security); + if (err) { + xfrm_audit_policy_delete(pol, 0, + task_valid); + return err; + } + } + } + } + return err; +} +#else +static inline int +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) +{ + return 0; +} +#endif + +int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) +{ + int dir, err = 0, cnt = 0; + + write_lock_bh(&net->xfrm.xfrm_policy_lock); + + err = xfrm_policy_flush_secctx_check(net, type, task_valid); + if (err) + goto out; - write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - while ((xp = xfrm_policy_list[dir]) != NULL) { - xfrm_policy_list[dir] = xp->next; - write_unlock_bh(&xfrm_policy_lock); + struct xfrm_policy *pol; + int i; + + again1: + hlist_for_each_entry(pol, + &net->xfrm.policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + __xfrm_policy_unlink(pol, dir); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + cnt++; + + xfrm_audit_policy_delete(pol, 1, task_valid); - xfrm_policy_kill(xp); + xfrm_policy_kill(pol); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); + goto again1; } + + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { + again2: + hlist_for_each_entry(pol, + net->xfrm.policy_bydst[dir].table + i, + bydst) { + if (pol->type != type) + continue; + __xfrm_policy_unlink(pol, dir); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + cnt++; + + xfrm_audit_policy_delete(pol, 1, task_valid); + xfrm_policy_kill(pol); + + write_lock_bh(&net->xfrm.xfrm_policy_lock); + goto again2; + } + } + } - atomic_inc(&flow_cache_genid); - write_unlock_bh(&xfrm_policy_lock); + if (!cnt) + err = -ESRCH; +out: + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + return err; } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, + int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *xp; - int dir; - int count = 0; + struct xfrm_policy *pol; + struct xfrm_policy_walk_entry *x; int error = 0; - read_lock_bh(&xfrm_policy_lock); - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) - count++; - } + if (walk->type >= XFRM_POLICY_TYPE_MAX && + walk->type != XFRM_POLICY_TYPE_ANY) + return -EINVAL; - if (count == 0) { - error = -ENOENT; - goto out; - } + if (list_empty(&walk->walk.all) && walk->seq != 0) + return 0; - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { - error = func(xp, dir%XFRM_POLICY_MAX, --count, data); - if (error) - goto out; + write_lock_bh(&net->xfrm.xfrm_policy_lock); + if (list_empty(&walk->walk.all)) + x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); + else + x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); + list_for_each_entry_from(x, &net->xfrm.policy_all, all) { + if (x->dead) + continue; + pol = container_of(x, struct xfrm_policy, walk); + if (walk->type != XFRM_POLICY_TYPE_ANY && + walk->type != pol->type) + continue; + error = func(pol, xfrm_policy_id2dir(pol->index), + walk->seq, data); + if (error) { + list_move_tail(&walk->walk.all, &x->all); + goto out; } + walk->seq++; } - + if (walk->seq == 0) { + error = -ENOENT; + goto out; + } + list_del_init(&walk->walk.all); out: - read_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); -/* Find policy to apply to this flow. */ +void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + INIT_LIST_HEAD(&walk->walk.all); + walk->walk.dead = 1; + walk->type = type; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_policy_walk_init); -static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) { - struct xfrm_policy *pol; + if (list_empty(&walk->walk.all)) + return; - read_lock_bh(&xfrm_policy_lock); - for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { - struct xfrm_selector *sel = &pol->selector; - int match; + write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ + list_del(&walk->walk.all); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_policy_walk_done); - if (pol->family != family) - continue; +/* + * Find policy to apply to this flow. + * + * Returns 0 if policy found, else an -errno. + */ +static int xfrm_policy_match(const struct xfrm_policy *pol, + const struct flowi *fl, + u8 type, u16 family, int dir) +{ + const struct xfrm_selector *sel = &pol->selector; + int ret = -ESRCH; + bool match; - match = xfrm_selector_match(sel, fl, family); + if (pol->family != family || + (fl->flowi_mark & pol->mark.m) != pol->mark.v || + pol->type != type) + return ret; - if (match) { - if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { - xfrm_pol_hold(pol); - break; + match = xfrm_selector_match(sel, fl, family); + if (match) + ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, + dir); + + return ret; +} + +static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, + const struct flowi *fl, + u16 family, u8 dir) +{ + int err; + struct xfrm_policy *pol, *ret; + const xfrm_address_t *daddr, *saddr; + struct hlist_head *chain; + u32 priority = ~0U; + + daddr = xfrm_flowi_daddr(fl, family); + saddr = xfrm_flowi_saddr(fl, family); + if (unlikely(!daddr || !saddr)) + return NULL; + + read_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = policy_hash_direct(net, daddr, saddr, family, dir); + ret = NULL; + hlist_for_each_entry(pol, chain, bydst) { + err = xfrm_policy_match(pol, fl, type, family, dir); + if (err) { + if (err == -ESRCH) + continue; + else { + ret = ERR_PTR(err); + goto fail; + } + } else { + ret = pol; + priority = ret->priority; + break; + } + } + chain = &net->xfrm.policy_inexact[dir]; + hlist_for_each_entry(pol, chain, bydst) { + err = xfrm_policy_match(pol, fl, type, family, dir); + if (err) { + if (err == -ESRCH) + continue; + else { + ret = ERR_PTR(err); + goto fail; } + } else if (pol->priority < priority) { + ret = pol; + break; } } - read_unlock_bh(&xfrm_policy_lock); - if ((*objp = (void *) pol) != NULL) - *obj_refp = &pol->refcnt; + if (ret) + xfrm_pol_hold(ret); +fail: + read_unlock_bh(&net->xfrm.xfrm_policy_lock); + + return ret; +} + +static struct xfrm_policy * +__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) +{ +#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_policy *pol; + + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); + if (pol != NULL) + return pol; +#endif + return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); +} + +static int flow_to_policy_dir(int dir) +{ + if (XFRM_POLICY_IN == FLOW_DIR_IN && + XFRM_POLICY_OUT == FLOW_DIR_OUT && + XFRM_POLICY_FWD == FLOW_DIR_FWD) + return dir; + + switch (dir) { + default: + case FLOW_DIR_IN: + return XFRM_POLICY_IN; + case FLOW_DIR_OUT: + return XFRM_POLICY_OUT; + case FLOW_DIR_FWD: + return XFRM_POLICY_FWD; + } +} + +static struct flow_cache_object * +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, + u8 dir, struct flow_cache_object *old_obj, void *ctx) +{ + struct xfrm_policy *pol; + + if (old_obj) + xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); + + pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); + if (IS_ERR_OR_NULL(pol)) + return ERR_CAST(pol); + + /* Resolver returns two references: + * one for cache and one for caller of flow_cache_lookup() */ + xfrm_pol_hold(pol); + + return &pol->flo; } static inline int policy_to_flow_dir(int dir) { if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - switch (dir) { - default: - case XFRM_POLICY_IN: - return FLOW_DIR_IN; - case XFRM_POLICY_OUT: - return FLOW_DIR_OUT; - case XFRM_POLICY_FWD: - return FLOW_DIR_FWD; - }; -} - -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) + XFRM_POLICY_OUT == FLOW_DIR_OUT && + XFRM_POLICY_FWD == FLOW_DIR_FWD) + return dir; + switch (dir) { + default: + case XFRM_POLICY_IN: + return FLOW_DIR_IN; + case XFRM_POLICY_OUT: + return FLOW_DIR_OUT; + case XFRM_POLICY_FWD: + return FLOW_DIR_FWD; + } +} + +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, + const struct flowi *fl) { struct xfrm_policy *pol; + struct net *net = sock_net(sk); - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); if ((pol = sk->sk_policy[dir]) != NULL) { - int match = xfrm_selector_match(&pol->selector, fl, - sk->sk_family); - int err = 0; + bool match = xfrm_selector_match(&pol->selector, fl, + sk->sk_family); + int err = 0; - if (match) - err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); - - if (match && !err) - xfrm_pol_hold(pol); - else + if (match) { + if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { + pol = NULL; + goto out; + } + err = security_xfrm_policy_lookup(pol->security, + fl->flowi_secid, + policy_to_flow_dir(dir)); + if (!err) + xfrm_pol_hold(pol); + else if (err == -ESRCH) + pol = NULL; + else + pol = ERR_PTR(err); + } else pol = NULL; } - read_unlock_bh(&xfrm_policy_lock); +out: + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - pol->next = xfrm_policy_list[dir]; - xfrm_policy_list[dir] = pol; + struct net *net = xp_net(pol); + struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, + pol->family, dir); + + list_add(&pol->walk.all, &net->xfrm.policy_all); + hlist_add_head(&pol->bydst, chain); + hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); + net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); + + if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { - struct xfrm_policy **polp; + struct net *net = xp_net(pol); - for (polp = &xfrm_policy_list[dir]; - *polp != NULL; polp = &(*polp)->next) { - if (*polp == pol) { - *polp = pol->next; - return pol; - } - } - return NULL; + if (hlist_unhashed(&pol->bydst)) + return NULL; + + hlist_del_init(&pol->bydst); + hlist_del(&pol->byidx); + list_del(&pol->walk.all); + net->xfrm.policy_count[dir]--; + + return pol; } int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { - write_lock_bh(&xfrm_policy_lock); + struct net *net = xp_net(pol); + + write_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { - if (dir < XFRM_POLICY_MAX) - atomic_inc(&flow_cache_genid); xfrm_policy_kill(pol); return 0; } return -ENOENT; } +EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { + struct net *net = xp_net(pol); struct xfrm_policy *old_pol; - write_lock_bh(&xfrm_policy_lock); +#ifdef CONFIG_XFRM_SUB_POLICY + if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) + return -EINVAL; +#endif + + write_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { - pol->curlft.add_time = (unsigned long)xtime.tv_sec; - pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); + pol->curlft.add_time = get_seconds(); + pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } - if (old_pol) + if (old_pol) { + if (pol) + xfrm_policy_requeue(old_pol, pol); + + /* Unlinking succeeds always. This is the only function + * allowed to delete or replace socket policy. + */ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); - write_unlock_bh(&xfrm_policy_lock); + } + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); @@ -645,27 +1204,31 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return 0; } -static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) +static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) { - struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); + struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); + struct net *net = xp_net(old); if (newp) { newp->selector = old->selector; - if (security_xfrm_policy_clone(old, newp)) { + if (security_xfrm_policy_clone(old->security, + &newp->security)) { kfree(newp); return NULL; /* ENOMEM */ } newp->lft = old->lft; newp->curlft = old->curlft; + newp->mark = old->mark; newp->action = old->action; newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; + newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; @@ -684,27 +1247,49 @@ int __xfrm_sk_clone_policy(struct sock *sk) return 0; } +static int +xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, + unsigned short family) +{ + int err; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + + if (unlikely(afinfo == NULL)) + return -EINVAL; + err = afinfo->get_saddr(net, local, remote); + xfrm_policy_put_afinfo(afinfo); + return err; +} + /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, + struct xfrm_state **xfrm, unsigned short family) { + struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); + xfrm_address_t tmp; - for (nx=0, i = 0; i < policy->xfrm_nr; i++) { + for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; xfrm_address_t *remote = daddr; xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode) { + if (tmpl->mode == XFRM_MODE_TUNNEL || + tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); + if (error) + goto fail; + local = &tmp; + } } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); @@ -719,6 +1304,8 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, error = (x->km.state == XFRM_STATE_ERROR ? -EINVAL : -EAGAIN); xfrm_state_put(x); + } else if (error == -ESRCH) { + error = -EAGAIN; } if (!tmpl->optional) @@ -727,189 +1314,900 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, return nx; fail: - for (nx--; nx>=0; nx--) + for (nx--; nx >= 0; nx--) xfrm_state_put(xfrm[nx]); return error; } +static int +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, + struct xfrm_state **xfrm, unsigned short family) +{ + struct xfrm_state *tp[XFRM_MAX_DEPTH]; + struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; + int cnx = 0; + int error; + int ret; + int i; + + for (i = 0; i < npols; i++) { + if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { + error = -ENOBUFS; + goto fail; + } + + ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); + if (ret < 0) { + error = ret; + goto fail; + } else + cnx += ret; + } + + /* found states are sorted for outbound processing */ + if (npols > 1) + xfrm_state_sort(xfrm, tpp, cnx, family); + + return cnx; + + fail: + for (cnx--; cnx >= 0; cnx--) + xfrm_state_put(tpp[cnx]); + return error; + +} + /* Check that the bundle accepts the flow and its components are * still valid. */ -static struct dst_entry * -xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) +static inline int xfrm_get_tos(const struct flowi *fl, int family) { - struct dst_entry *x; struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return ERR_PTR(-EINVAL); - x = afinfo->find_bundle(fl, policy); + int tos; + + if (!afinfo) + return -EINVAL; + + tos = afinfo->get_tos(fl); + xfrm_policy_put_afinfo(afinfo); - return x; + + return tos; } -/* Allocate chain of dst_entry's, attach known xfrm's, calculate - * all the metrics... Shortly, bundle a bundle. - */ +static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) +{ + struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); + struct dst_entry *dst = &xdst->u.dst; + + if (xdst->route == NULL) { + /* Dummy bundle - if it has xfrms we were not + * able to build bundle as template resolution failed. + * It means we need to try again resolving. */ + if (xdst->num_xfrms > 0) + return NULL; + } else if (dst->flags & DST_XFRM_QUEUE) { + return NULL; + } else { + /* Real bundle */ + if (stale_bundle(dst)) + return NULL; + } -static int -xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, - struct flowi *fl, struct dst_entry **dst_p, - unsigned short family) + dst_hold(dst); + return flo; +} + +static int xfrm_bundle_flo_check(struct flow_cache_object *flo) +{ + struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); + struct dst_entry *dst = &xdst->u.dst; + + if (!xdst->route) + return 0; + if (stale_bundle(dst)) + return 0; + + return 1; +} + +static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) +{ + struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); + struct dst_entry *dst = &xdst->u.dst; + + dst_free(dst); +} + +static const struct flow_cache_ops xfrm_bundle_fc_ops = { + .get = xfrm_bundle_flo_get, + .check = xfrm_bundle_flo_check, + .delete = xfrm_bundle_flo_delete, +}; + +static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { - int err; struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) + struct dst_ops *dst_ops; + struct xfrm_dst *xdst; + + if (!afinfo) + return ERR_PTR(-EINVAL); + + switch (family) { + case AF_INET: + dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); + + if (likely(xdst)) { + struct dst_entry *dst = &xdst->u.dst; + + memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); + xdst->flo.ops = &xfrm_bundle_fc_ops; + if (afinfo->init_dst) + afinfo->init_dst(net, xdst); + } else + xdst = ERR_PTR(-ENOBUFS); + + xfrm_policy_put_afinfo(afinfo); + + return xdst; +} + +static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, + int nfheader_len) +{ + struct xfrm_policy_afinfo *afinfo = + xfrm_policy_get_afinfo(dst->ops->family); + int err; + + if (!afinfo) return -EINVAL; - err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p); + + err = afinfo->init_path(path, dst, nfheader_len); + xfrm_policy_put_afinfo(afinfo); + return err; } +static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + const struct flowi *fl) +{ + struct xfrm_policy_afinfo *afinfo = + xfrm_policy_get_afinfo(xdst->u.dst.ops->family); + int err; -static int stale_bundle(struct dst_entry *dst); + if (!afinfo) + return -EINVAL; -/* Main function: finds/creates a bundle for given flow. - * - * At the moment we eat a raw IP route. Mostly to speed up lookups - * on interfaces with disabled IPsec. + err = afinfo->fill_dst(xdst, dev, fl); + + xfrm_policy_put_afinfo(afinfo); + + return err; +} + + +/* Allocate chain of dst_entry's, attach known xfrm's, calculate + * all the metrics... Shortly, bundle a bundle. */ -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) + +static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, + struct xfrm_state **xfrm, int nx, + const struct flowi *fl, + struct dst_entry *dst) { - struct xfrm_policy *policy; - struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; - struct dst_entry *dst, *dst_orig = *dst_p; - int nx = 0; + struct net *net = xp_net(policy); + unsigned long now = jiffies; + struct net_device *dev; + struct xfrm_mode *inner_mode; + struct dst_entry *dst_prev = NULL; + struct dst_entry *dst0 = NULL; + int i = 0; int err; - u32 genid; - u16 family = dst_orig->ops->family; - u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - u32 sk_sid = security_sk_sid(sk, fl, dir); -restart: - genid = atomic_read(&flow_cache_genid); - policy = NULL; - if (sk && sk->sk_policy[1]) - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); - - if (!policy) { - /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) - return 0; + int header_len = 0; + int nfheader_len = 0; + int trailer_len = 0; + int tos; + int family = policy->selector.family; + xfrm_address_t saddr, daddr; + + xfrm_flowi_addr_get(fl, &saddr, &daddr, family); + + tos = xfrm_get_tos(fl, family); + err = tos; + if (tos < 0) + goto put_states; + + dst_hold(dst); + + for (; i < nx; i++) { + struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); + struct dst_entry *dst1 = &xdst->u.dst; + + err = PTR_ERR(xdst); + if (IS_ERR(xdst)) { + dst_release(dst); + goto put_states; + } + + if (xfrm[i]->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(xfrm[i], + xfrm_af2proto(family)); + if (!inner_mode) { + err = -EAFNOSUPPORT; + dst_release(dst); + goto put_states; + } + } else + inner_mode = xfrm[i]->inner_mode; + + if (!dst_prev) + dst0 = dst1; + else { + dst_prev->child = dst_clone(dst1); + dst1->flags |= DST_NOHASH; + } + + xdst->route = dst; + dst_copy_metrics(dst1, dst); + + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { + family = xfrm[i]->props.family; + dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, + family); + err = PTR_ERR(dst); + if (IS_ERR(dst)) + goto put_states; + } else + dst_hold(dst); + + dst1->xfrm = xfrm[i]; + xdst->xfrm_genid = xfrm[i]->genid; - policy = flow_cache_lookup(fl, sk_sid, family, dir, - xfrm_policy_lookup); + dst1->obsolete = DST_OBSOLETE_FORCE_CHK; + dst1->flags |= DST_HOST; + dst1->lastuse = now; + + dst1->input = dst_discard; + dst1->output = inner_mode->afinfo->output; + + dst1->next = dst_prev; + dst_prev = dst1; + + header_len += xfrm[i]->props.header_len; + if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) + nfheader_len += xfrm[i]->props.header_len; + trailer_len += xfrm[i]->props.trailer_len; } - if (!policy) - return 0; + dst_prev->child = dst; + dst0->path = dst; - policy->curlft.use_time = (unsigned long)xtime.tv_sec; + err = -ENODEV; + dev = dst->dev; + if (!dev) + goto free_dst; - switch (policy->action) { - case XFRM_POLICY_BLOCK: - /* Prohibit the flow */ - err = -EPERM; - goto error; + xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); + xfrm_init_pmtu(dst_prev); - case XFRM_POLICY_ALLOW: - if (policy->xfrm_nr == 0) { - /* Flow passes not transformed. */ - xfrm_pol_put(policy); - return 0; + for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { + struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; + + err = xfrm_fill_dst(xdst, dev, fl); + if (err) + goto free_dst; + + dst_prev->header_len = header_len; + dst_prev->trailer_len = trailer_len; + header_len -= xdst->u.dst.xfrm->props.header_len; + trailer_len -= xdst->u.dst.xfrm->props.trailer_len; + } + +out: + return dst0; + +put_states: + for (; i < nx; i++) + xfrm_state_put(xfrm[i]); +free_dst: + if (dst0) + dst_free(dst0); + dst0 = ERR_PTR(err); + goto out; +} + +#ifdef CONFIG_XFRM_SUB_POLICY +static int xfrm_dst_alloc_copy(void **target, const void *src, int size) +{ + if (!*target) { + *target = kmalloc(size, GFP_ATOMIC); + if (!*target) + return -ENOMEM; + } + + memcpy(*target, src, size); + return 0; +} +#endif + +static int xfrm_dst_update_parent(struct dst_entry *dst, + const struct xfrm_selector *sel) +{ +#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + return xfrm_dst_alloc_copy((void **)&(xdst->partner), + sel, sizeof(*sel)); +#else + return 0; +#endif +} + +static int xfrm_dst_update_origin(struct dst_entry *dst, + const struct flowi *fl) +{ +#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); +#else + return 0; +#endif +} + +static int xfrm_expand_policies(const struct flowi *fl, u16 family, + struct xfrm_policy **pols, + int *num_pols, int *num_xfrms) +{ + int i; + + if (*num_pols == 0 || !pols[0]) { + *num_pols = 0; + *num_xfrms = 0; + return 0; + } + if (IS_ERR(pols[0])) + return PTR_ERR(pols[0]); + + *num_xfrms = pols[0]->xfrm_nr; + +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && + pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), + XFRM_POLICY_TYPE_MAIN, + fl, family, + XFRM_POLICY_OUT); + if (pols[1]) { + if (IS_ERR(pols[1])) { + xfrm_pols_put(pols, *num_pols); + return PTR_ERR(pols[1]); + } + (*num_pols)++; + (*num_xfrms) += pols[1]->xfrm_nr; } + } +#endif + for (i = 0; i < *num_pols; i++) { + if (pols[i]->action != XFRM_POLICY_ALLOW) { + *num_xfrms = -1; + break; + } + } - /* Try to find matching bundle. - * - * LATER: help from flow cache. It is optional, this - * is required only for output policy. - */ - dst = xfrm_find_bundle(fl, policy, family); + return 0; + +} + +static struct xfrm_dst * +xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, + const struct flowi *fl, u16 family, + struct dst_entry *dst_orig) +{ + struct net *net = xp_net(pols[0]); + struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct dst_entry *dst; + struct xfrm_dst *xdst; + int err; + + /* Try to instantiate a bundle */ + err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); + if (err <= 0) { + if (err != 0 && err != -EAGAIN) + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); + return ERR_PTR(err); + } + + dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); + if (IS_ERR(dst)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); + return ERR_CAST(dst); + } + + xdst = (struct xfrm_dst *)dst; + xdst->num_xfrms = err; + if (num_pols > 1) + err = xfrm_dst_update_parent(dst, &pols[1]->selector); + else + err = xfrm_dst_update_origin(dst, fl); + if (unlikely(err)) { + dst_free(dst); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + return ERR_PTR(err); + } + + xdst->num_pols = num_pols; + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); + xdst->policy_genid = atomic_read(&pols[0]->genid); + + return xdst; +} + +static void xfrm_policy_queue_process(unsigned long arg) +{ + int err = 0; + struct sk_buff *skb; + struct sock *sk; + struct dst_entry *dst; + struct xfrm_policy *pol = (struct xfrm_policy *)arg; + struct xfrm_policy_queue *pq = &pol->polq; + struct flowi fl; + struct sk_buff_head list; + + spin_lock(&pq->hold_queue.lock); + skb = skb_peek(&pq->hold_queue); + if (!skb) { + spin_unlock(&pq->hold_queue.lock); + goto out; + } + dst = skb_dst(skb); + sk = skb->sk; + xfrm_decode_session(skb, &fl, dst->ops->family); + spin_unlock(&pq->hold_queue.lock); + + dst_hold(dst->path); + dst = xfrm_lookup(xp_net(pol), dst->path, &fl, + sk, 0); + if (IS_ERR(dst)) + goto purge_queue; + + if (dst->flags & DST_XFRM_QUEUE) { + dst_release(dst); + + if (pq->timeout >= XFRM_QUEUE_TMO_MAX) + goto purge_queue; + + pq->timeout = pq->timeout << 1; + if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) + xfrm_pol_hold(pol); + goto out; + } + + dst_release(dst); + + __skb_queue_head_init(&list); + + spin_lock(&pq->hold_queue.lock); + pq->timeout = 0; + skb_queue_splice_init(&pq->hold_queue, &list); + spin_unlock(&pq->hold_queue.lock); + + while (!skb_queue_empty(&list)) { + skb = __skb_dequeue(&list); + + xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + dst_hold(skb_dst(skb)->path); + dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, + &fl, skb->sk, 0); if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto error; + kfree_skb(skb); + continue; } - if (dst) - break; + nf_reset(skb); + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + err = dst_output(skb); + } - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); +out: + xfrm_pol_put(pol); + return; + +purge_queue: + pq->timeout = 0; + xfrm_queue_purge(&pq->hold_queue); + xfrm_pol_put(pol); +} + +static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) +{ + unsigned long sched_next; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_dst *xdst = (struct xfrm_dst *) dst; + struct xfrm_policy *pol = xdst->pols[0]; + struct xfrm_policy_queue *pq = &pol->polq; + const struct sk_buff *fclone = skb + 1; + + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && + fclone->fclone == SKB_FCLONE_CLONE)) { + kfree_skb(skb); + return 0; + } - if (unlikely(nx<0)) { - err = nx; - if (err == -EAGAIN && flags) { - DECLARE_WAITQUEUE(wait, current); + if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { + kfree_skb(skb); + return -EAGAIN; + } - add_wait_queue(&km_waitq, &wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - set_current_state(TASK_RUNNING); - remove_wait_queue(&km_waitq, &wait); + skb_dst_force(skb); - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); + spin_lock_bh(&pq->hold_queue.lock); - if (nx == -EAGAIN && signal_pending(current)) { - err = -ERESTART; - goto error; - } - if (nx == -EAGAIN || - genid != atomic_read(&flow_cache_genid)) { - xfrm_pol_put(policy); - goto restart; - } - err = nx; - } - if (err < 0) - goto error; + if (!pq->timeout) + pq->timeout = XFRM_QUEUE_TMO_MIN; + + sched_next = jiffies + pq->timeout; + + if (del_timer(&pq->hold_timer)) { + if (time_before(pq->hold_timer.expires, sched_next)) + sched_next = pq->hold_timer.expires; + xfrm_pol_put(pol); + } + + __skb_queue_tail(&pq->hold_queue, skb); + if (!mod_timer(&pq->hold_timer, sched_next)) + xfrm_pol_hold(pol); + + spin_unlock_bh(&pq->hold_queue.lock); + + return 0; +} + +static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, + struct dst_entry *dst, + const struct flowi *fl, + int num_xfrms, + u16 family) +{ + int err; + struct net_device *dev; + struct dst_entry *dst1; + struct xfrm_dst *xdst; + + xdst = xfrm_alloc_dst(net, family); + if (IS_ERR(xdst)) + return xdst; + + if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) + return xdst; + + dst1 = &xdst->u.dst; + dst_hold(dst); + xdst->route = dst; + + dst_copy_metrics(dst1, dst); + + dst1->obsolete = DST_OBSOLETE_FORCE_CHK; + dst1->flags |= DST_HOST | DST_XFRM_QUEUE; + dst1->lastuse = jiffies; + + dst1->input = dst_discard; + dst1->output = xdst_queue_output; + + dst_hold(dst); + dst1->child = dst; + dst1->path = dst; + + xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); + + err = -ENODEV; + dev = dst->dev; + if (!dev) + goto free_dst; + + err = xfrm_fill_dst(xdst, dev, fl); + if (err) + goto free_dst; + +out: + return xdst; + +free_dst: + dst_release(dst1); + xdst = ERR_PTR(err); + goto out; +} + +static struct flow_cache_object * +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, + struct flow_cache_object *oldflo, void *ctx) +{ + struct dst_entry *dst_orig = (struct dst_entry *)ctx; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + struct xfrm_dst *xdst, *new_xdst; + int num_pols = 0, num_xfrms = 0, i, err, pol_dead; + + /* Check if the policies from old bundle are usable */ + xdst = NULL; + if (oldflo) { + xdst = container_of(oldflo, struct xfrm_dst, flo); + num_pols = xdst->num_pols; + num_xfrms = xdst->num_xfrms; + pol_dead = 0; + for (i = 0; i < num_pols; i++) { + pols[i] = xdst->pols[i]; + pol_dead |= pols[i]->walk.dead; } - if (nx == 0) { - /* Flow passes not transformed. */ - xfrm_pol_put(policy); - return 0; + if (pol_dead) { + dst_free(&xdst->u.dst); + xdst = NULL; + num_pols = 0; + num_xfrms = 0; + oldflo = NULL; } + } - dst = dst_orig; - err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family); + /* Resolve policies to use if we couldn't get them from + * previous cache entry */ + if (xdst == NULL) { + num_pols = 1; + pols[0] = __xfrm_policy_lookup(net, fl, family, + flow_to_policy_dir(dir)); + err = xfrm_expand_policies(fl, family, pols, + &num_pols, &num_xfrms); + if (err < 0) + goto inc_error; + if (num_pols == 0) + return NULL; + if (num_xfrms <= 0) + goto make_dummy_bundle; + } - if (unlikely(err)) { - int i; - for (i=0; i<nx; i++) - xfrm_state_put(xfrm[i]); + new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); + if (IS_ERR(new_xdst)) { + err = PTR_ERR(new_xdst); + if (err != -EAGAIN) goto error; + if (oldflo == NULL) + goto make_dummy_bundle; + dst_hold(&xdst->u.dst); + return oldflo; + } else if (new_xdst == NULL) { + num_xfrms = 0; + if (oldflo == NULL) + goto make_dummy_bundle; + xdst->num_xfrms = 0; + dst_hold(&xdst->u.dst); + return oldflo; + } + + /* Kill the previous bundle */ + if (xdst) { + /* The policies were stolen for newly generated bundle */ + xdst->num_pols = 0; + dst_free(&xdst->u.dst); + } + + /* Flow cache does not have reference, it dst_free()'s, + * but we do need to return one reference for original caller */ + dst_hold(&new_xdst->u.dst); + return &new_xdst->flo; + +make_dummy_bundle: + /* We found policies, but there's no bundles to instantiate: + * either because the policy blocks, has no transformations or + * we could not build template (no xfrm_states).*/ + xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); + if (IS_ERR(xdst)) { + xfrm_pols_put(pols, num_pols); + return ERR_CAST(xdst); + } + xdst->num_pols = num_pols; + xdst->num_xfrms = num_xfrms; + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); + + dst_hold(&xdst->u.dst); + return &xdst->flo; + +inc_error: + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); +error: + if (xdst != NULL) + dst_free(&xdst->u.dst); + else + xfrm_pols_put(pols, num_pols); + return ERR_PTR(err); +} + +static struct dst_entry *make_blackhole(struct net *net, u16 family, + struct dst_entry *dst_orig) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct dst_entry *ret; + + if (!afinfo) { + dst_release(dst_orig); + return ERR_PTR(-EINVAL); + } else { + ret = afinfo->blackhole_route(net, dst_orig); + } + xfrm_policy_put_afinfo(afinfo); + + return ret; +} + +/* Main function: finds/creates a bundle for given flow. + * + * At the moment we eat a raw IP route. Mostly to speed up lookups + * on interfaces with disabled IPsec. + */ +struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, int flags) +{ + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + struct flow_cache_object *flo; + struct xfrm_dst *xdst; + struct dst_entry *dst, *route; + u16 family = dst_orig->ops->family; + u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + int i, err, num_pols, num_xfrms = 0, drop_pols = 0; + + dst = NULL; + xdst = NULL; + route = NULL; + + if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { + num_pols = 1; + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + err = xfrm_expand_policies(fl, family, pols, + &num_pols, &num_xfrms); + if (err < 0) + goto dropdst; + + if (num_pols) { + if (num_xfrms <= 0) { + drop_pols = num_pols; + goto no_transform; + } + + xdst = xfrm_resolve_and_create_bundle( + pols, num_pols, fl, + family, dst_orig); + if (IS_ERR(xdst)) { + xfrm_pols_put(pols, num_pols); + err = PTR_ERR(xdst); + goto dropdst; + } else if (xdst == NULL) { + num_xfrms = 0; + drop_pols = num_pols; + goto no_transform; + } + + dst_hold(&xdst->u.dst); + xdst->u.dst.flags |= DST_NOCACHE; + route = xdst->route; } + } - write_lock_bh(&policy->lock); - if (unlikely(policy->dead || stale_bundle(dst))) { - /* Wow! While we worked on resolving, this - * policy has gone. Retry. It is not paranoia, - * we just cannot enlist new bundle to dead object. - * We can't enlist stable bundles either. - */ - write_unlock_bh(&policy->lock); - - xfrm_pol_put(policy); - if (dst) - dst_free(dst); - goto restart; + if (xdst == NULL) { + /* To accelerate a bit... */ + if ((dst_orig->flags & DST_NOXFRM) || + !net->xfrm.policy_count[XFRM_POLICY_OUT]) + goto nopol; + + flo = flow_cache_lookup(net, fl, family, dir, + xfrm_bundle_lookup, dst_orig); + if (flo == NULL) + goto nopol; + if (IS_ERR(flo)) { + err = PTR_ERR(flo); + goto dropdst; } - dst->next = policy->bundles; - policy->bundles = dst; - dst_hold(dst); - write_unlock_bh(&policy->lock); + xdst = container_of(flo, struct xfrm_dst, flo); + + num_pols = xdst->num_pols; + num_xfrms = xdst->num_xfrms; + memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols); + route = xdst->route; } - *dst_p = dst; - dst_release(dst_orig); - xfrm_pol_put(policy); - return 0; + dst = &xdst->u.dst; + if (route == NULL && num_xfrms > 0) { + /* The only case when xfrm_bundle_lookup() returns a + * bundle with null route, is when the template could + * not be resolved. It means policies are there, but + * bundle could not be created, since we don't yet + * have the xfrm_state's. We need to wait for KM to + * negotiate new SA's or bail out with error.*/ + if (net->xfrm.sysctl_larval_drop) { + dst_release(dst); + xfrm_pols_put(pols, drop_pols); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); + + return make_blackhole(net, family, dst_orig); + } + + err = -EAGAIN; + + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); + goto error; + } + +no_transform: + if (num_pols == 0) + goto nopol; + + if ((flags & XFRM_LOOKUP_ICMP) && + !(pols[0]->flags & XFRM_POLICY_ICMP)) { + err = -ENOENT; + goto error; + } + + for (i = 0; i < num_pols; i++) + pols[i]->curlft.use_time = get_seconds(); + + if (num_xfrms < 0) { + /* Prohibit the flow */ + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); + err = -EPERM; + goto error; + } else if (num_xfrms > 0) { + /* Flow transformed */ + dst_release(dst_orig); + } else { + /* Flow passes untransformed */ + dst_release(dst); + dst = dst_orig; + } +ok: + xfrm_pols_put(pols, drop_pols); + if (dst && dst->xfrm && + dst->xfrm->props.mode == XFRM_MODE_TUNNEL) + dst->flags |= DST_XFRM_TUNNEL; + return dst; + +nopol: + if (!(flags & XFRM_LOOKUP_ICMP)) { + dst = dst_orig; + goto ok; + } + err = -ENOENT; error: + dst_release(dst); +dropdst: dst_release(dst_orig); - xfrm_pol_put(policy); - *dst_p = NULL; - return err; + xfrm_pols_put(pols, drop_pols); + return ERR_PTR(err); } EXPORT_SYMBOL(xfrm_lookup); +static inline int +xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) +{ + struct xfrm_state *x; + + if (!skb->sp || idx < 0 || idx >= skb->sp->len) + return 0; + x = skb->sp->xvec[idx]; + if (!x->type->reject) + return 0; + return x->type->reject(x, skb, fl); +} + /* When skb is transformed back to its "native" form, we have to * check policy restrictions. At the moment we make this in maximally * stupid way. Shame on me. :-) Of course, connected sockets must @@ -917,171 +2215,302 @@ EXPORT_SYMBOL(xfrm_lookup); */ static inline int -xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, +xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, unsigned short family) { if (xfrm_state_kern(x)) - return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family); + return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); return x->id.proto == tmpl->id.proto && (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && - (tmpl->aalgos & (1<<x->props.aalgo)) && - !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); + (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || + !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && + !(x->props.mode != XFRM_MODE_TRANSPORT && + xfrm_state_addr_cmp(tmpl, x, family)); } +/* + * 0 or more than 0 is returned when validation is succeeded (either bypass + * because of optional transport mode, or next index of the mathced secpath + * state with the template. + * -1 is returned when no matching template is found. + * Otherwise "-2 - errored_index" is returned. + */ static inline int -xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, +xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, unsigned short family) { int idx = start; if (tmpl->optional) { - if (!tmpl->mode) + if (tmpl->mode == XFRM_MODE_TRANSPORT) return start; } else start = -1; for (; idx < sp->len; idx++) { - if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family)) + if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) return ++idx; - if (sp->x[idx].xvec->props.mode) + if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { + if (start == -1) + start = -2-idx; break; + } } return start; } -int -xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) +int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, + unsigned int family, int reverse) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + int err; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; - afinfo->decode_session(skb, fl); + afinfo->decode_session(skb, fl, reverse); + err = security_xfrm_decode_session(skb, &fl->flowi_secid); xfrm_policy_put_afinfo(afinfo); - return 0; + return err; } -EXPORT_SYMBOL(xfrm_decode_session); +EXPORT_SYMBOL(__xfrm_decode_session); -static inline int secpath_has_tunnel(struct sec_path *sp, int k) +static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { - if (sp->x[k].xvec->props.mode) + if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { + *idxp = k; return 1; + } } return 0; } -int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, +int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct xfrm_policy *pol; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols = 0; + int xfrm_nr; + int pi; + int reverse; struct flowi fl; - u8 fl_dir = policy_to_flow_dir(dir); - u32 sk_sid; + u8 fl_dir; + int xerr_idx = -1; + + reverse = dir & ~XFRM_POLICY_MASK; + dir &= XFRM_POLICY_MASK; + fl_dir = policy_to_flow_dir(dir); - if (xfrm_decode_session(skb, &fl, family) < 0) + if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; - nf_nat_decode_session(skb, &fl, family); + } - sk_sid = security_sk_sid(sk, &fl, fl_dir); + nf_nat_decode_session(skb, &fl, family); /* First, check used SA against their selectors. */ if (skb->sp) { int i; - for (i=skb->sp->len-1; i>=0; i--) { - struct sec_decap_state *xvec = &(skb->sp->x[i]); - if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) - return 0; - - /* If there is a post_input processor, try running it */ - if (xvec->xvec->type->post_input && - (xvec->xvec->type->post_input)(xvec->xvec, - &(xvec->decap), - skb) != 0) + for (i = skb->sp->len-1; i >= 0; i--) { + struct xfrm_state *x = skb->sp->xvec[i]; + if (!xfrm_selector_match(&x->sel, &fl, family)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; + } } } pol = NULL; - if (sk && sk->sk_policy[dir]) - pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); + if (sk && sk->sk_policy[dir]) { + pol = xfrm_sk_policy_lookup(sk, dir, &fl); + if (IS_ERR(pol)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); + return 0; + } + } + + if (!pol) { + struct flow_cache_object *flo; - if (!pol) - pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, - xfrm_policy_lookup); + flo = flow_cache_lookup(net, &fl, family, fl_dir, + xfrm_policy_lookup, NULL); + if (IS_ERR_OR_NULL(flo)) + pol = ERR_CAST(flo); + else + pol = container_of(flo, struct xfrm_policy, flo); + } - if (!pol) - return !skb->sp || !secpath_has_tunnel(skb->sp, 0); + if (IS_ERR(pol)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); + return 0; + } + + if (!pol) { + if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { + xfrm_secpath_reject(xerr_idx, skb, &fl); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); + return 0; + } + return 1; + } - pol->curlft.use_time = (unsigned long)xtime.tv_sec; + pol->curlft.use_time = get_seconds(); + + pols[0] = pol; + npols++; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, + &fl, family, + XFRM_POLICY_IN); + if (pols[1]) { + if (IS_ERR(pols[1])) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); + return 0; + } + pols[1]->curlft.use_time = get_seconds(); + npols++; + } + } +#endif if (pol->action == XFRM_POLICY_ALLOW) { struct sec_path *sp; static struct sec_path dummy; + struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl **tpp = tp; + int ti = 0; int i, k; if ((sp = skb->sp) == NULL) sp = &dummy; + for (pi = 0; pi < npols; pi++) { + if (pols[pi] != pol && + pols[pi]->action != XFRM_POLICY_ALLOW) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); + goto reject; + } + if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + goto reject_error; + } + for (i = 0; i < pols[pi]->xfrm_nr; i++) + tpp[ti++] = &pols[pi]->xfrm_vec[i]; + } + xfrm_nr = ti; + if (npols > 1) { + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net); + tpp = stp; + } + /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. */ - for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { - k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); - if (k < 0) + for (i = xfrm_nr-1, k = 0; i >= 0; i--) { + k = xfrm_policy_ok(tpp[i], sp, k, family); + if (k < 0) { + if (k < -1) + /* "-2 - errored_index" returned */ + xerr_idx = -(2+k); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; + } } - if (secpath_has_tunnel(sp, k)) + if (secpath_has_nontransport(sp, k, &xerr_idx)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; + } - xfrm_pol_put(pol); + xfrm_pols_put(pols, npols); return 1; } + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); reject: - xfrm_pol_put(pol); + xfrm_secpath_reject(xerr_idx, skb, &fl); +reject_error: + xfrm_pols_put(pols, npols); return 0; } EXPORT_SYMBOL(__xfrm_policy_check); int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct flowi fl; + struct dst_entry *dst; + int res = 1; - if (xfrm_decode_session(skb, &fl, family) < 0) + if (xfrm_decode_session(skb, &fl, family) < 0) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; + } - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; + skb_dst_force(skb); + + dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); + if (IS_ERR(dst)) { + res = 0; + dst = NULL; + } + skb_dst_set(skb, dst); + return res; } EXPORT_SYMBOL(__xfrm_route_forward); +/* Optimize later using cookies and generation ids. */ + static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { - /* If it is marked obsolete, which is how we even get here, - * then we have purged it from the policy bundle list and we - * did that for a good reason. + /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete + * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to + * get validated by dst_ops->check on every use. We do this + * because when a normal route referenced by an XFRM dst is + * obsoleted we do not go looking around for all parent + * referencing XFRM dsts so that we can invalidate them. It + * is just too much work. Instead we make the checks here on + * every use. For example: + * + * XFRM dst A --> IPv4 dst X + * + * X is the "xdst->route" of A (X is also the "dst->path" of A + * in this example). If X is marked obsolete, "A" will not + * notice. That's what we are validating here via the + * stale_bundle() check. + * + * When a policy's bundle is pruned, we dst_free() the XFRM + * dst which causes it's ->obsolete field to be set to + * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like + * this, we want to force a new route lookup. */ + if (dst->obsolete < 0 && !stale_bundle(dst)) + return dst; + return NULL; } static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); + return !xfrm_bundle_ok((struct xfrm_dst *)dst); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = dev_net(dev)->loopback_dev; + dev_hold(dst->dev); dev_put(dev); } } @@ -1090,7 +2519,6 @@ EXPORT_SYMBOL(xfrm_dst_ifdown); static void xfrm_link_failure(struct sk_buff *skb) { /* Impossible. Such dst must be popped before reaches point of failure. */ - return; } static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) @@ -1104,65 +2532,18 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) -{ - int i; - struct xfrm_policy *pol; - struct dst_entry *dst, **dstp, *gc_list = NULL; - - read_lock_bh(&xfrm_policy_lock); - for (i=0; i<2*XFRM_POLICY_MAX; i++) { - for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = gc_list; - gc_list = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); - } - } - read_unlock_bh(&xfrm_policy_lock); - - while (gc_list) { - dst = gc_list; - gc_list = dst->next; - dst_free(dst); - } -} - -static int unused_bundle(struct dst_entry *dst) -{ - return !atomic_read(&dst->__refcnt); -} - -static void __xfrm_garbage_collect(void) -{ - xfrm_prune_bundles(unused_bundle); -} - -int xfrm_flush_bundles(void) -{ - xfrm_prune_bundles(stale_bundle); - return 0; -} - -static int always_true(struct dst_entry *dst) +void xfrm_garbage_collect(struct net *net) { - return 1; + flow_cache_flush(net); } +EXPORT_SYMBOL(xfrm_garbage_collect); -void xfrm_flush_all_bundles(void) +static void xfrm_garbage_collect_deferred(struct net *net) { - xfrm_prune_bundles(always_true); + flow_cache_flush_deferred(net); } -void xfrm_init_pmtu(struct dst_entry *dst) +static void xfrm_init_pmtu(struct dst_entry *dst) { do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1179,17 +2560,15 @@ void xfrm_init_pmtu(struct dst_entry *dst) if (pmtu > route_mtu_cached) pmtu = route_mtu_cached; - dst->metrics[RTAX_MTU-1] = pmtu; + dst_metric_set(dst, RTAX_MTU, pmtu); } while ((dst = dst->next)); } -EXPORT_SYMBOL(xfrm_init_pmtu); - /* Check that the bundle accepts the flow and its components are * still valid. */ -int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) +static int xfrm_bundle_ok(struct xfrm_dst *first) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -1199,15 +2578,21 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) (dst->dev && !netif_running(dst->dev))) return 0; + if (dst->flags & DST_XFRM_QUEUE) + return 1; + last = NULL; do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) - return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; + if (xdst->xfrm_genid != dst->xfrm->genid) + return 0; + if (xdst->num_pols > 0 && + xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) + return 0; mtu = dst_mtu(dst->child); if (xdst->child_mtu_cached != mtu) { @@ -1236,28 +2621,46 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) mtu = xfrm_state_mtu(dst->xfrm, mtu); if (mtu > last->route_mtu_cached) mtu = last->route_mtu_cached; - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); if (last == first) break; - last = last->u.next; + last = (struct xfrm_dst *)last->u.dst.next; last->child_mtu_cached = mtu; } return 1; } -EXPORT_SYMBOL(xfrm_bundle_ok); +static unsigned int xfrm_default_advmss(const struct dst_entry *dst) +{ + return dst_metric_advmss(dst->path); +} + +static unsigned int xfrm_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst_mtu(dst->path); +} + +static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + return dst->path->ops->neigh_lookup(dst, skb, daddr); +} int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { + struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock(&xfrm_policy_afinfo_lock); + spin_lock(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { @@ -1266,15 +2669,42 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->kmem_cachep = xfrm_dst_cache; if (likely(dst_ops->check == NULL)) dst_ops->check = xfrm_dst_check; + if (likely(dst_ops->default_advmss == NULL)) + dst_ops->default_advmss = xfrm_default_advmss; + if (likely(dst_ops->mtu == NULL)) + dst_ops->mtu = xfrm_mtu; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) dst_ops->link_failure = xfrm_link_failure; + if (likely(dst_ops->neigh_lookup == NULL)) + dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(afinfo->garbage_collect == NULL)) - afinfo->garbage_collect = __xfrm_garbage_collect; - xfrm_policy_afinfo[afinfo->family] = afinfo; + afinfo->garbage_collect = xfrm_garbage_collect_deferred; + rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); + } + spin_unlock(&xfrm_policy_afinfo_lock); + + rtnl_lock(); + for_each_net(net) { + struct dst_ops *xfrm_dst_ops; + + switch (afinfo->family) { + case AF_INET: + xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + *xfrm_dst_ops = *afinfo->dst_ops; } - write_unlock(&xfrm_policy_afinfo_lock); + rtnl_unlock(); + return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -1286,77 +2716,518 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock(&xfrm_policy_afinfo_lock); + spin_lock(&xfrm_policy_afinfo_lock); if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { - struct dst_ops *dst_ops = afinfo->dst_ops; - xfrm_policy_afinfo[afinfo->family] = NULL; - dst_ops->kmem_cachep = NULL; - dst_ops->check = NULL; - dst_ops->negative_advice = NULL; - dst_ops->link_failure = NULL; - afinfo->garbage_collect = NULL; - } + else + RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], + NULL); + } + spin_unlock(&xfrm_policy_afinfo_lock); + if (!err) { + struct dst_ops *dst_ops = afinfo->dst_ops; + + synchronize_rcu(); + + dst_ops->kmem_cachep = NULL; + dst_ops->check = NULL; + dst_ops->negative_advice = NULL; + dst_ops->link_failure = NULL; + afinfo->garbage_collect = NULL; } - write_unlock(&xfrm_policy_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +static void __net_init xfrm_dst_ops_init(struct net *net) { struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - read_lock(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[family]; - if (likely(afinfo != NULL)) - read_lock(&afinfo->lock); - read_unlock(&xfrm_policy_afinfo_lock); - return afinfo; -} -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - if (unlikely(afinfo == NULL)) - return; - read_unlock(&afinfo->lock); + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); + if (afinfo) + net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; +#if IS_ENABLED(CONFIG_IPV6) + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); + if (afinfo) + net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; +#endif + rcu_read_unlock(); } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + switch (event) { case NETDEV_DOWN: - xfrm_flush_bundles(); + xfrm_garbage_collect(dev_net(dev)); } return NOTIFY_DONE; } static struct notifier_block xfrm_dev_notifier = { - xfrm_dev_event, - NULL, - 0 + .notifier_call = xfrm_dev_event, }; -static void __init xfrm_policy_init(void) +#ifdef CONFIG_XFRM_STATISTICS +static int __net_init xfrm_statistics_init(struct net *net) +{ + int rv; + net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); + if (!net->mib.xfrm_statistics) + return -ENOMEM; + rv = xfrm_proc_init(net); + if (rv < 0) + free_percpu(net->mib.xfrm_statistics); + return rv; +} + +static void xfrm_statistics_fini(struct net *net) +{ + xfrm_proc_fini(net); + free_percpu(net->mib.xfrm_statistics); +} +#else +static int __net_init xfrm_statistics_init(struct net *net) +{ + return 0; +} + +static void xfrm_statistics_fini(struct net *net) +{ +} +#endif + +static int __net_init xfrm_policy_init(struct net *net) { - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", + unsigned int hmask, sz; + int dir; + + if (net_eq(net, &init_net)) + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!xfrm_dst_cache) - panic("XFRM: failed to allocate xfrm_dst_cache\n"); + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL); + + hmask = 8 - 1; + sz = (hmask+1) * sizeof(struct hlist_head); + + net->xfrm.policy_byidx = xfrm_hash_alloc(sz); + if (!net->xfrm.policy_byidx) + goto out_byidx; + net->xfrm.policy_idx_hmask = hmask; + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + net->xfrm.policy_count[dir] = 0; + INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); + + htab = &net->xfrm.policy_bydst[dir]; + htab->table = xfrm_hash_alloc(sz); + if (!htab->table) + goto out_bydst; + htab->hmask = hmask; + } + + INIT_LIST_HEAD(&net->xfrm.policy_all); + INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); + if (net_eq(net, &init_net)) + register_netdevice_notifier(&xfrm_dev_notifier); + return 0; + +out_bydst: + for (dir--; dir >= 0; dir--) { + struct xfrm_policy_hash *htab; + + htab = &net->xfrm.policy_bydst[dir]; + xfrm_hash_free(htab->table, sz); + } + xfrm_hash_free(net->xfrm.policy_byidx, sz); +out_byidx: + return -ENOMEM; +} - INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); - register_netdevice_notifier(&xfrm_dev_notifier); +static void xfrm_policy_fini(struct net *net) +{ + unsigned int sz; + int dir; + + flush_work(&net->xfrm.policy_hash_work); +#ifdef CONFIG_XFRM_SUB_POLICY + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); +#endif + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); + + WARN_ON(!list_empty(&net->xfrm.policy_all)); + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); + + htab = &net->xfrm.policy_bydst[dir]; + sz = (htab->hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(htab->table)); + xfrm_hash_free(htab->table, sz); + } + + sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); + xfrm_hash_free(net->xfrm.policy_byidx, sz); +} + +static int __net_init xfrm_net_init(struct net *net) +{ + int rv; + + rv = xfrm_statistics_init(net); + if (rv < 0) + goto out_statistics; + rv = xfrm_state_init(net); + if (rv < 0) + goto out_state; + rv = xfrm_policy_init(net); + if (rv < 0) + goto out_policy; + xfrm_dst_ops_init(net); + rv = xfrm_sysctl_init(net); + if (rv < 0) + goto out_sysctl; + rv = flow_cache_init(net); + if (rv < 0) + goto out; + + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + rwlock_init(&net->xfrm.xfrm_policy_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + + return 0; + +out: + xfrm_sysctl_fini(net); +out_sysctl: + xfrm_policy_fini(net); +out_policy: + xfrm_state_fini(net); +out_state: + xfrm_statistics_fini(net); +out_statistics: + return rv; +} + +static void __net_exit xfrm_net_exit(struct net *net) +{ + flow_cache_fini(net); + xfrm_sysctl_fini(net); + xfrm_policy_fini(net); + xfrm_state_fini(net); + xfrm_statistics_fini(net); } +static struct pernet_operations __net_initdata xfrm_net_ops = { + .init = xfrm_net_init, + .exit = xfrm_net_exit, +}; + void __init xfrm_init(void) { - xfrm_state_init(); - xfrm_policy_init(); + register_pernet_subsys(&xfrm_net_ops); xfrm_input_init(); } +#ifdef CONFIG_AUDITSYSCALL +static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, + struct audit_buffer *audit_buf) +{ + struct xfrm_sec_ctx *ctx = xp->security; + struct xfrm_selector *sel = &xp->selector; + + if (ctx) + audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", + ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); + + switch (sel->family) { + case AF_INET: + audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); + if (sel->prefixlen_s != 32) + audit_log_format(audit_buf, " src_prefixlen=%d", + sel->prefixlen_s); + audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); + if (sel->prefixlen_d != 32) + audit_log_format(audit_buf, " dst_prefixlen=%d", + sel->prefixlen_d); + break; + case AF_INET6: + audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); + if (sel->prefixlen_s != 128) + audit_log_format(audit_buf, " src_prefixlen=%d", + sel->prefixlen_s); + audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); + if (sel->prefixlen_d != 128) + audit_log_format(audit_buf, " dst_prefixlen=%d", + sel->prefixlen_d); + break; + } +} + +void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) +{ + struct audit_buffer *audit_buf; + + audit_buf = xfrm_audit_start("SPD-add"); + if (audit_buf == NULL) + return; + xfrm_audit_helper_usrinfo(task_valid, audit_buf); + audit_log_format(audit_buf, " res=%u", result); + xfrm_audit_common_policyinfo(xp, audit_buf); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); + +void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, + bool task_valid) +{ + struct audit_buffer *audit_buf; + + audit_buf = xfrm_audit_start("SPD-delete"); + if (audit_buf == NULL) + return; + xfrm_audit_helper_usrinfo(task_valid, audit_buf); + audit_log_format(audit_buf, " res=%u", result); + xfrm_audit_common_policyinfo(xp, audit_buf); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); +#endif + +#ifdef CONFIG_XFRM_MIGRATE +static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, + const struct xfrm_selector *sel_tgt) +{ + if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { + if (sel_tgt->family == sel_cmp->family && + xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, + sel_cmp->family) && + xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, + sel_cmp->family) && + sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && + sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { + return true; + } + } else { + if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { + return true; + } + } + return false; +} + +static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, + u8 dir, u8 type, struct net *net) +{ + struct xfrm_policy *pol, *ret = NULL; + struct hlist_head *chain; + u32 priority = ~0U; + + read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ + chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); + hlist_for_each_entry(pol, chain, bydst) { + if (xfrm_migrate_selector_match(sel, &pol->selector) && + pol->type == type) { + ret = pol; + priority = ret->priority; + break; + } + } + chain = &net->xfrm.policy_inexact[dir]; + hlist_for_each_entry(pol, chain, bydst) { + if (xfrm_migrate_selector_match(sel, &pol->selector) && + pol->type == type && + pol->priority < priority) { + ret = pol; + break; + } + } + + if (ret) + xfrm_pol_hold(ret); + + read_unlock_bh(&net->xfrm.xfrm_policy_lock); + + return ret; +} + +static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) +{ + int match = 0; + + if (t->mode == m->mode && t->id.proto == m->proto && + (m->reqid == 0 || t->reqid == m->reqid)) { + switch (t->mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, + m->old_family) && + xfrm_addr_equal(&t->saddr, &m->old_saddr, + m->old_family)) { + match = 1; + } + break; + case XFRM_MODE_TRANSPORT: + /* in case of transport mode, template does not store + any IP addresses, hence we just compare mode and + protocol */ + match = 1; + break; + default: + break; + } + } + return match; +} + +/* update endpoint address(es) of template(s) */ +static int xfrm_policy_migrate(struct xfrm_policy *pol, + struct xfrm_migrate *m, int num_migrate) +{ + struct xfrm_migrate *mp; + int i, j, n = 0; + + write_lock_bh(&pol->lock); + if (unlikely(pol->walk.dead)) { + /* target policy has been deleted */ + write_unlock_bh(&pol->lock); + return -ENOENT; + } + + for (i = 0; i < pol->xfrm_nr; i++) { + for (j = 0, mp = m; j < num_migrate; j++, mp++) { + if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) + continue; + n++; + if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && + pol->xfrm_vec[i].mode != XFRM_MODE_BEET) + continue; + /* update endpoints */ + memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, + sizeof(pol->xfrm_vec[i].id.daddr)); + memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, + sizeof(pol->xfrm_vec[i].saddr)); + pol->xfrm_vec[i].encap_family = mp->new_family; + /* flush bundles */ + atomic_inc(&pol->genid); + } + } + + write_unlock_bh(&pol->lock); + + if (!n) + return -ENODATA; + + return 0; +} + +static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) +{ + int i, j; + + if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) + return -EINVAL; + + for (i = 0; i < num_migrate; i++) { + if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, + m[i].old_family) && + xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, + m[i].old_family)) + return -EINVAL; + if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || + xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) + return -EINVAL; + + /* check if there is any duplicated entry */ + for (j = i + 1; j < num_migrate; j++) { + if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, + sizeof(m[i].old_daddr)) && + !memcmp(&m[i].old_saddr, &m[j].old_saddr, + sizeof(m[i].old_saddr)) && + m[i].proto == m[j].proto && + m[i].mode == m[j].mode && + m[i].reqid == m[j].reqid && + m[i].old_family == m[j].old_family) + return -EINVAL; + } + } + + return 0; +} + +int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k, struct net *net) +{ + int i, err, nx_cur = 0, nx_new = 0; + struct xfrm_policy *pol = NULL; + struct xfrm_state *x, *xc; + struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; + struct xfrm_state *x_new[XFRM_MAX_DEPTH]; + struct xfrm_migrate *mp; + + if ((err = xfrm_migrate_check(m, num_migrate)) < 0) + goto out; + + /* Stage 1 - find policy */ + if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { + err = -ENOENT; + goto out; + } + + /* Stage 2 - find and update state(s) */ + for (i = 0, mp = m; i < num_migrate; i++, mp++) { + if ((x = xfrm_migrate_state_find(mp, net))) { + x_cur[nx_cur] = x; + nx_cur++; + if ((xc = xfrm_state_migrate(x, mp))) { + x_new[nx_new] = xc; + nx_new++; + } else { + err = -ENODATA; + goto restore_state; + } + } + } + + /* Stage 3 - update policy */ + if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) + goto restore_state; + + /* Stage 4 - delete old state(s) */ + if (nx_cur) { + xfrm_states_put(x_cur, nx_cur); + xfrm_states_delete(x_cur, nx_cur); + } + + /* Stage 5 - announce */ + km_migrate(sel, dir, type, m, num_migrate, k); + + xfrm_pol_put(pol); + + return 0; +out: + return err; + +restore_state: + if (pol) + xfrm_pol_put(pol); + if (nx_cur) + xfrm_states_put(x_cur, nx_cur); + if (nx_new) + xfrm_states_delete(x_new, nx_new); + + return err; +} +EXPORT_SYMBOL(xfrm_migrate); +#endif diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c new file mode 100644 index 00000000000..9c4fbd8935f --- /dev/null +++ b/net/xfrm/xfrm_proc.c @@ -0,0 +1,86 @@ +/* + * xfrm_proc.c + * + * Copyright (C)2006-2007 USAGI/WIDE Project + * + * Authors: Masahide NAKAMURA <nakam@linux-ipv6.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/export.h> +#include <net/snmp.h> +#include <net/xfrm.h> + +static const struct snmp_mib xfrm_mib_list[] = { + SNMP_MIB_ITEM("XfrmInError", LINUX_MIB_XFRMINERROR), + SNMP_MIB_ITEM("XfrmInBufferError", LINUX_MIB_XFRMINBUFFERERROR), + SNMP_MIB_ITEM("XfrmInHdrError", LINUX_MIB_XFRMINHDRERROR), + SNMP_MIB_ITEM("XfrmInNoStates", LINUX_MIB_XFRMINNOSTATES), + SNMP_MIB_ITEM("XfrmInStateProtoError", LINUX_MIB_XFRMINSTATEPROTOERROR), + SNMP_MIB_ITEM("XfrmInStateModeError", LINUX_MIB_XFRMINSTATEMODEERROR), + SNMP_MIB_ITEM("XfrmInStateSeqError", LINUX_MIB_XFRMINSTATESEQERROR), + SNMP_MIB_ITEM("XfrmInStateExpired", LINUX_MIB_XFRMINSTATEEXPIRED), + SNMP_MIB_ITEM("XfrmInStateMismatch", LINUX_MIB_XFRMINSTATEMISMATCH), + SNMP_MIB_ITEM("XfrmInStateInvalid", LINUX_MIB_XFRMINSTATEINVALID), + SNMP_MIB_ITEM("XfrmInTmplMismatch", LINUX_MIB_XFRMINTMPLMISMATCH), + SNMP_MIB_ITEM("XfrmInNoPols", LINUX_MIB_XFRMINNOPOLS), + SNMP_MIB_ITEM("XfrmInPolBlock", LINUX_MIB_XFRMINPOLBLOCK), + SNMP_MIB_ITEM("XfrmInPolError", LINUX_MIB_XFRMINPOLERROR), + SNMP_MIB_ITEM("XfrmOutError", LINUX_MIB_XFRMOUTERROR), + SNMP_MIB_ITEM("XfrmOutBundleGenError", LINUX_MIB_XFRMOUTBUNDLEGENERROR), + SNMP_MIB_ITEM("XfrmOutBundleCheckError", LINUX_MIB_XFRMOUTBUNDLECHECKERROR), + SNMP_MIB_ITEM("XfrmOutNoStates", LINUX_MIB_XFRMOUTNOSTATES), + SNMP_MIB_ITEM("XfrmOutStateProtoError", LINUX_MIB_XFRMOUTSTATEPROTOERROR), + SNMP_MIB_ITEM("XfrmOutStateModeError", LINUX_MIB_XFRMOUTSTATEMODEERROR), + SNMP_MIB_ITEM("XfrmOutStateSeqError", LINUX_MIB_XFRMOUTSTATESEQERROR), + SNMP_MIB_ITEM("XfrmOutStateExpired", LINUX_MIB_XFRMOUTSTATEEXPIRED), + SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK), + SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), + SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), + SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), + SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), + SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), + SNMP_MIB_SENTINEL +}; + +static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) +{ + struct net *net = seq->private; + int i; + for (i = 0; xfrm_mib_list[i].name; i++) + seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, + snmp_fold_field(net->mib.xfrm_statistics, + xfrm_mib_list[i].entry)); + return 0; +} + +static int xfrm_statistics_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, xfrm_statistics_seq_show); +} + +static const struct file_operations xfrm_statistics_seq_fops = { + .owner = THIS_MODULE, + .open = xfrm_statistics_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + +int __net_init xfrm_proc_init(struct net *net) +{ + if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net, + &xfrm_statistics_seq_fops)) + return -ENOMEM; + return 0; +} + +void xfrm_proc_fini(struct net *net) +{ + remove_proc_entry("xfrm_stat", net->proc_net); +} diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c new file mode 100644 index 00000000000..dab57daae40 --- /dev/null +++ b/net/xfrm/xfrm_replay.c @@ -0,0 +1,603 @@ +/* + * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c. + * + * Copyright (C) 2010 secunet Security Networks AG + * Copyright (C) 2010 Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/export.h> +#include <net/xfrm.h> + +u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) +{ + u32 seq, seq_hi, bottom; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!(x->props.flags & XFRM_STATE_ESN)) + return 0; + + seq = ntohl(net_seq); + seq_hi = replay_esn->seq_hi; + bottom = replay_esn->seq - replay_esn->replay_window + 1; + + if (likely(replay_esn->seq >= replay_esn->replay_window - 1)) { + /* A. same subspace */ + if (unlikely(seq < bottom)) + seq_hi++; + } else { + /* B. window spans two subspaces */ + if (unlikely(seq >= bottom)) + seq_hi--; + } + + return seq_hi; +} + +static void xfrm_replay_notify(struct xfrm_state *x, int event) +{ + struct km_event c; + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (!x->replay_maxdiff || + ((x->replay.seq - x->preplay.seq < x->replay_maxdiff) && + (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(&x->replay, &x->preplay, + sizeof(struct xfrm_replay_state)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; + if (unlikely(x->replay.oseq == 0)) { + x->replay.oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (!x->props.replay_window) + return 0; + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > x->replay.seq)) + return 0; + + diff = x->replay.seq - seq; + if (diff >= x->props.replay_window) { + x->stats.replay_window++; + goto err; + } + + if (x->replay.bitmap & (1U << diff)) { + x->stats.replay++; + goto err; + } + return 0; + +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (!x->props.replay_window) + return; + + if (seq > x->replay.seq) { + diff = seq - x->replay.seq; + if (diff < x->props.replay_window) + x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; + else + x->replay.bitmap = 1; + x->replay.seq = seq; + } else { + diff = x->replay.seq - seq; + x->replay.bitmap |= (1U << diff); + } + + if (xfrm_aevent_is_on(xs_net(x))) + x->repl->notify(x, XFRM_REPLAY_UPDATE); +} + +static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + if (unlikely(replay_esn->oseq == 0)) { + replay_esn->oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_bmp(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 pos; + u32 seq = ntohl(net_seq); + u32 diff = replay_esn->seq - seq; + + if (!replay_esn->replay_window) + return 0; + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > replay_esn->seq)) + return 0; + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (pos >= diff) + bitnr = (pos - diff) % replay_esn->replay_window; + else + bitnr = replay_esn->replay_window - (diff - pos); + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 pos; + + if (!replay_esn->replay_window) + return; + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (seq > replay_esn->seq) { + diff = seq - replay_esn->seq; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + } else { + nr = (replay_esn->replay_window - 1) >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + } + + bitnr = (pos + diff) % replay_esn->replay_window; + replay_esn->seq = seq; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) + bitnr = (pos - diff) % replay_esn->replay_window; + else + bitnr = replay_esn->replay_window - (diff - pos); + } + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + + if (xfrm_aevent_is_on(xs_net(x))) + x->repl->notify(x, XFRM_REPLAY_UPDATE); +} + +static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) +{ + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (!x->replay_maxdiff || + ((replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && + (replay_esn->oseq - preplay_esn->oseq + < x->replay_maxdiff))) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) +{ + u32 seq_diff, oseq_diff; + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (x->replay_maxdiff) { + if (replay_esn->seq_hi == preplay_esn->seq_hi) + seq_diff = replay_esn->seq - preplay_esn->seq; + else + seq_diff = ~preplay_esn->seq + replay_esn->seq + + 1; + + if (replay_esn->oseq_hi == preplay_esn->oseq_hi) + oseq_diff = replay_esn->oseq + - preplay_esn->oseq; + else + oseq_diff = ~preplay_esn->oseq + + replay_esn->oseq + 1; + + if (seq_diff >= x->replay_maxdiff || + oseq_diff >= x->replay_maxdiff) + break; + } + + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + XFRM_SKB_CB(skb)->seq.output.hi = replay_esn->oseq_hi; + + if (unlikely(replay_esn->oseq == 0)) { + XFRM_SKB_CB(skb)->seq.output.hi = ++replay_esn->oseq_hi; + + if (replay_esn->oseq_hi == 0) { + replay_esn->oseq--; + replay_esn->oseq_hi--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 pos; + u32 seq = ntohl(net_seq); + u32 wsize = replay_esn->replay_window; + u32 top = replay_esn->seq; + u32 bottom = top - wsize + 1; + + if (!wsize) + return 0; + + if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && + (replay_esn->seq < replay_esn->replay_window - 1))) + goto err; + + diff = top - seq; + + if (likely(top >= wsize - 1)) { + /* A. same subspace */ + if (likely(seq > top) || seq < bottom) + return 0; + } else { + /* B. window spans two subspaces */ + if (likely(seq > top && seq < bottom)) + return 0; + if (seq >= bottom) + diff = ~seq + top + 1; + } + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (pos >= diff) + bitnr = (pos - diff) % replay_esn->replay_window; + else + bitnr = replay_esn->replay_window - (diff - pos); + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static int xfrm_replay_recheck_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi != + htonl(xfrm_replay_seqhi(x, net_seq)))) { + x->stats.replay_window++; + return -EINVAL; + } + + return xfrm_replay_check_esn(x, skb, net_seq); +} + +static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + int wrap; + u32 diff, pos, seq, seq_hi; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!replay_esn->replay_window) + return; + + seq = ntohl(net_seq); + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + seq_hi = xfrm_replay_seqhi(x, net_seq); + wrap = seq_hi - replay_esn->seq_hi; + + if ((!wrap && seq > replay_esn->seq) || wrap > 0) { + if (likely(!wrap)) + diff = seq - replay_esn->seq; + else + diff = ~replay_esn->seq + seq + 1; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + } else { + nr = (replay_esn->replay_window - 1) >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + } + + bitnr = (pos + diff) % replay_esn->replay_window; + replay_esn->seq = seq; + + if (unlikely(wrap > 0)) + replay_esn->seq_hi++; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) + bitnr = (pos - diff) % replay_esn->replay_window; + else + bitnr = replay_esn->replay_window - (diff - pos); + } + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + + if (xfrm_aevent_is_on(xs_net(x))) + x->repl->notify(x, XFRM_REPLAY_UPDATE); +} + +static struct xfrm_replay xfrm_replay_legacy = { + .advance = xfrm_replay_advance, + .check = xfrm_replay_check, + .recheck = xfrm_replay_check, + .notify = xfrm_replay_notify, + .overflow = xfrm_replay_overflow, +}; + +static struct xfrm_replay xfrm_replay_bmp = { + .advance = xfrm_replay_advance_bmp, + .check = xfrm_replay_check_bmp, + .recheck = xfrm_replay_check_bmp, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_bmp, +}; + +static struct xfrm_replay xfrm_replay_esn = { + .advance = xfrm_replay_advance_esn, + .check = xfrm_replay_check_esn, + .recheck = xfrm_replay_recheck_esn, + .notify = xfrm_replay_notify_esn, + .overflow = xfrm_replay_overflow_esn, +}; + +int xfrm_init_replay(struct xfrm_state *x) +{ + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (replay_esn) { + if (replay_esn->replay_window > + replay_esn->bmp_len * sizeof(__u32) * 8) + return -EINVAL; + + if (x->props.flags & XFRM_STATE_ESN) { + if (replay_esn->replay_window == 0) + return -EINVAL; + x->repl = &xfrm_replay_esn; + } else + x->repl = &xfrm_replay_bmp; + } else + x->repl = &xfrm_replay_legacy; + + return 0; +} +EXPORT_SYMBOL(xfrm_init_replay); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index e12d0be5f97..0ab54134bb4 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -18,54 +18,340 @@ #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/module.h> +#include <linux/cache.h> +#include <linux/audit.h> #include <asm/uaccess.h> +#include <linux/ktime.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> + +#include "xfrm_hash.h" /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) - 2. Hash table by daddr to find what SAs exist for given + 2. Hash table by (daddr,family,reqid) to find what SAs exist for given destination/tunnel endpoint. (output) */ -static DEFINE_SPINLOCK(xfrm_state_lock); +static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -/* Hash table to find appropriate SA towards given target (endpoint - * of tunnel or destination of transport mode) allowed by selector. - * - * Main use is finding SA after policy selected tunnel or transport mode. - * Also, it can be used by ah/esp icmp error handler to find offending SA. - */ -static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static inline unsigned int xfrm_dst_hash(struct net *net, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + u32 reqid, + unsigned short family) +{ + return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); +} + +static inline unsigned int xfrm_src_hash(struct net *net, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family) +{ + return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); +} + +static inline unsigned int +xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, + __be32 spi, u8 proto, unsigned short family) +{ + return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); +} + +static void xfrm_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + struct hlist_head *nsrctable, + struct hlist_head *nspitable, + unsigned int nhashmask) +{ + struct hlist_node *tmp; + struct xfrm_state *x; + + hlist_for_each_entry_safe(x, tmp, list, bydst) { + unsigned int h; + + h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family, + nhashmask); + hlist_add_head(&x->bydst, ndsttable+h); + + h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, + x->props.family, + nhashmask); + hlist_add_head(&x->bysrc, nsrctable+h); + + if (x->id.spi) { + h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, + x->id.proto, x->props.family, + nhashmask); + hlist_add_head(&x->byspi, nspitable+h); + } + } +} + +static unsigned long xfrm_hash_new_size(unsigned int state_hmask) +{ + return ((state_hmask + 1) << 1) * sizeof(struct hlist_head); +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(struct work_struct *work) +{ + struct net *net = container_of(work, struct net, xfrm.state_hash_work); + struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; + unsigned long nsize, osize; + unsigned int nhashmask, ohashmask; + int i; + + mutex_lock(&hash_resize_mutex); + + nsize = xfrm_hash_new_size(net->xfrm.state_hmask); + ndst = xfrm_hash_alloc(nsize); + if (!ndst) + goto out_unlock; + nsrc = xfrm_hash_alloc(nsize); + if (!nsrc) { + xfrm_hash_free(ndst, nsize); + goto out_unlock; + } + nspi = xfrm_hash_alloc(nsize); + if (!nspi) { + xfrm_hash_free(ndst, nsize); + xfrm_hash_free(nsrc, nsize); + goto out_unlock; + } + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + + nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + for (i = net->xfrm.state_hmask; i >= 0; i--) + xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, + nhashmask); + + odst = net->xfrm.state_bydst; + osrc = net->xfrm.state_bysrc; + ospi = net->xfrm.state_byspi; + ohashmask = net->xfrm.state_hmask; + + net->xfrm.state_bydst = ndst; + net->xfrm.state_bysrc = nsrc; + net->xfrm.state_byspi = nspi; + net->xfrm.state_hmask = nhashmask; + + spin_unlock_bh(&net->xfrm.xfrm_state_lock); -DECLARE_WAIT_QUEUE_HEAD(km_waitq); -EXPORT_SYMBOL(km_waitq); + osize = (ohashmask + 1) * sizeof(struct hlist_head); + xfrm_hash_free(odst, osize); + xfrm_hash_free(osrc, osize); + xfrm_hash_free(ospi, osize); -static DEFINE_RWLOCK(xfrm_state_afinfo_lock); -static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; +out_unlock: + mutex_unlock(&hash_resize_mutex); +} + +static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); +static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO]; -static struct work_struct xfrm_state_gc_work; -static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); -static int xfrm_state_gc_flush_bundles; +int __xfrm_state_delete(struct xfrm_state *x); + +int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); +bool km_is_alive(const struct km_event *c); +void km_state_expired(struct xfrm_state *x, int hard, u32 portid); + +static DEFINE_SPINLOCK(xfrm_type_lock); +int xfrm_register_type(const struct xfrm_type *type, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + const struct xfrm_type **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + spin_lock_bh(&xfrm_type_lock); + + if (likely(typemap[type->proto] == NULL)) + typemap[type->proto] = type; + else + err = -EEXIST; + spin_unlock_bh(&xfrm_type_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_type); + +int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + const struct xfrm_type **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + spin_lock_bh(&xfrm_type_lock); -static int __xfrm_state_delete(struct xfrm_state *x); + if (unlikely(typemap[type->proto] != type)) + err = -ENOENT; + else + typemap[type->proto] = NULL; + spin_unlock_bh(&xfrm_type_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_type); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); +static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo; + const struct xfrm_type **typemap; + const struct xfrm_type *type; + int modload_attempted = 0; -static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -static void km_state_expired(struct xfrm_state *x, int hard); +retry: + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + typemap = afinfo->type_map; + + type = typemap[proto]; + if (unlikely(type && !try_module_get(type->owner))) + type = NULL; + if (!type && !modload_attempted) { + xfrm_state_put_afinfo(afinfo); + request_module("xfrm-type-%d-%d", family, proto); + modload_attempted = 1; + goto retry; + } + + xfrm_state_put_afinfo(afinfo); + return type; +} + +static void xfrm_put_type(const struct xfrm_type *type) +{ + module_put(type->owner); +} + +static DEFINE_SPINLOCK(xfrm_mode_lock); +int xfrm_register_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -EEXIST; + modemap = afinfo->mode_map; + spin_lock_bh(&xfrm_mode_lock); + if (modemap[mode->encap]) + goto out; + + err = -ENOENT; + if (!try_module_get(afinfo->owner)) + goto out; + + mode->afinfo = afinfo; + modemap[mode->encap] = mode; + err = 0; + +out: + spin_unlock_bh(&xfrm_mode_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_mode); + +int xfrm_unregister_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -ENOENT; + modemap = afinfo->mode_map; + spin_lock_bh(&xfrm_mode_lock); + if (likely(modemap[mode->encap] == mode)) { + modemap[mode->encap] = NULL; + module_put(mode->afinfo->owner); + err = 0; + } + + spin_unlock_bh(&xfrm_mode_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_mode); + +static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode *mode; + int modload_attempted = 0; + + if (unlikely(encap >= XFRM_MODE_MAX)) + return NULL; + +retry: + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + + mode = afinfo->mode_map[encap]; + if (unlikely(mode && !try_module_get(mode->owner))) + mode = NULL; + if (!mode && !modload_attempted) { + xfrm_state_put_afinfo(afinfo); + request_module("xfrm-mode-%d-%d", family, encap); + modload_attempted = 1; + goto retry; + } + + xfrm_state_put_afinfo(afinfo); + return mode; +} + +static void xfrm_put_mode(struct xfrm_mode *mode) +{ + module_put(mode->owner); +} static void xfrm_state_gc_destroy(struct xfrm_state *x) { - if (del_timer(&x->timer)) - BUG(); + tasklet_hrtimer_cancel(&x->mtimer); + del_timer_sync(&x->rtimer); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); kfree(x->encap); + kfree(x->coaddr); + kfree(x->replay_esn); + kfree(x->preplay_esn); + if (x->inner_mode) + xfrm_put_mode(x->inner_mode); + if (x->inner_mode_iaf) + xfrm_put_mode(x->inner_mode_iaf); + if (x->outer_mode) + xfrm_put_mode(x->outer_mode); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -74,26 +360,19 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x); } -static void xfrm_state_gc_task(void *data) +static void xfrm_state_gc_task(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); - - if (xfrm_state_gc_flush_bundles) { - xfrm_state_gc_flush_bundles = 0; - xfrm_flush_bundles(); - } + struct hlist_node *tmp; + struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - list_splice_init(&xfrm_state_gc_list, &gc_list); + hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - x = list_entry(entry, struct xfrm_state, bydst); + hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - } - wake_up(&km_waitq); } static inline unsigned long make_jiffies(long secs) @@ -101,15 +380,17 @@ static inline unsigned long make_jiffies(long secs) if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) return MAX_SCHEDULE_TIMEOUT-1; else - return secs*HZ; + return secs*HZ; } -static void xfrm_timer_handler(unsigned long data) +static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) { - struct xfrm_state *x = (struct xfrm_state*)data; - unsigned long now = (unsigned long)xtime.tv_sec; + struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); + struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); + unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; + int err = 0; spin_lock(&x->lock); if (x->km.state == XFRM_STATE_DEAD) @@ -119,8 +400,17 @@ static void xfrm_timer_handler(unsigned long data) if (x->lft.hard_add_expires_seconds) { long tmo = x->lft.hard_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) - goto expired; + if (tmo <= 0) { + if (x->xflags & XFRM_SOFT_EXPIRE) { + /* enter hard expire without soft expire first?! + * setting a new date could trigger this. + * workarbound: fix x->curflt.add_time by below: + */ + x->curlft.add_time = now - x->saved_tmo - 1; + tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; + } else + goto expired; + } if (tmo < next) next = tmo; } @@ -137,10 +427,14 @@ static void xfrm_timer_handler(unsigned long data) if (x->lft.soft_add_expires_seconds) { long tmo = x->lft.soft_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) + if (tmo <= 0) { warn = 1; - else if (tmo < next) + x->xflags &= ~XFRM_SOFT_EXPIRE; + } else if (tmo < next) { next = tmo; + x->xflags |= XFRM_SOFT_EXPIRE; + x->saved_tmo = tmo; + } } if (x->lft.soft_use_expires_seconds) { long tmo = x->lft.soft_use_expires_seconds + @@ -153,48 +447,58 @@ static void xfrm_timer_handler(unsigned long data) x->km.dying = warn; if (warn) - km_state_expired(x, 0); + km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX && - !mod_timer(&x->timer, jiffies + make_jiffies(next))) - xfrm_state_hold(x); + if (next != LONG_MAX) { + tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); + } + goto out; expired: - if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { + if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) x->km.state = XFRM_STATE_EXPIRED; - wake_up(&km_waitq); - next = 2; - goto resched; - } - if (!__xfrm_state_delete(x) && x->id.spi) - km_state_expired(x, 1); + + err = __xfrm_state_delete(x); + if (!err) + km_state_expired(x, 1, 0); + + xfrm_audit_state_delete(x, err ? 0 : 1, true); out: spin_unlock(&x->lock); - xfrm_state_put(x); + return HRTIMER_NORESTART; } -struct xfrm_state *xfrm_state_alloc(void) +static void xfrm_replay_timer_handler(unsigned long data); + +struct xfrm_state *xfrm_state_alloc(struct net *net) { struct xfrm_state *x; - x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC); + x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC); if (x) { - memset(x, 0, sizeof(struct xfrm_state)); + write_pnet(&x->xs_net, net); atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->bydst); - INIT_LIST_HEAD(&x->byspi); - init_timer(&x->timer); - x->timer.function = xfrm_timer_handler; - x->timer.data = (unsigned long)x; - x->curlft.add_time = (unsigned long)xtime.tv_sec; + INIT_LIST_HEAD(&x->km.all); + INIT_HLIST_NODE(&x->bydst); + INIT_HLIST_NODE(&x->bysrc); + INIT_HLIST_NODE(&x->byspi); + tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, + CLOCK_BOOTTIME, HRTIMER_MODE_ABS); + setup_timer(&x->rtimer, xfrm_replay_timer_handler, + (unsigned long)x); + x->curlft.add_time = get_seconds(); x->lft.soft_byte_limit = XFRM_INF; x->lft.soft_packet_limit = XFRM_INF; x->lft.hard_byte_limit = XFRM_INF; x->lft.hard_packet_limit = XFRM_INF; + x->replay_maxage = 0; + x->replay_maxdiff = 0; + x->inner_mode = NULL; + x->inner_mode_iaf = NULL; spin_lock_init(&x->lock); } return x; @@ -203,52 +507,44 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - BUG_TRAP(x->km.state == XFRM_STATE_DEAD); + struct net *net = xs_net(x); + + WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - list_add(&x->bydst, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&xfrm_state_gc_work); + schedule_work(&net->xfrm.state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); -static int __xfrm_state_delete(struct xfrm_state *x) +int __xfrm_state_delete(struct xfrm_state *x) { + struct net *net = xs_net(x); int err = -ESRCH; if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; - spin_lock(&xfrm_state_lock); - list_del(&x->bydst); - atomic_dec(&x->refcnt); - if (x->id.spi) { - list_del(&x->byspi); - atomic_dec(&x->refcnt); - } - spin_unlock(&xfrm_state_lock); - if (del_timer(&x->timer)) - atomic_dec(&x->refcnt); - - /* The number two in this test is the reference - * mentioned in the comment below plus the reference - * our caller holds. A larger value means that - * there are DSTs attached to this xfrm_state. - */ - if (atomic_read(&x->refcnt) > 2) { - xfrm_state_gc_flush_bundles = 1; - schedule_work(&xfrm_state_gc_work); - } + spin_lock(&net->xfrm.xfrm_state_lock); + list_del(&x->km.all); + hlist_del(&x->bydst); + hlist_del(&x->bysrc); + if (x->id.spi) + hlist_del(&x->byspi); + net->xfrm.state_num--; + spin_unlock(&net->xfrm.xfrm_state_lock); /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. */ - atomic_dec(&x->refcnt); + xfrm_state_put(x); err = 0; } return err; } +EXPORT_SYMBOL(__xfrm_state_delete); int xfrm_state_delete(struct xfrm_state *x) { @@ -262,141 +558,321 @@ int xfrm_state_delete(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete); -void xfrm_state_flush(u8 proto) +#ifdef CONFIG_SECURITY_NETWORK_XFRM +static inline int +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { - int i; - struct xfrm_state *x; + int i, err = 0; + + for (i = 0; i <= net->xfrm.state_hmask; i++) { + struct xfrm_state *x; + + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + if (xfrm_id_proto_match(x->id.proto, proto) && + (err = security_xfrm_state_delete(x)) != 0) { + xfrm_audit_state_delete(x, 0, task_valid); + return err; + } + } + } + + return err; +} +#else +static inline int +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) +{ + return 0; +} +#endif + +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) +{ + int i, err = 0, cnt = 0; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + err = xfrm_state_flush_secctx_check(net, proto, task_valid); + if (err) + goto out; - spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + err = -ESRCH; + for (i = 0; i <= net->xfrm.state_hmask; i++) { + struct xfrm_state *x; restart: - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (!xfrm_state_kern(x) && - (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { + xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); - xfrm_state_delete(x); + err = xfrm_state_delete(x); + xfrm_audit_state_delete(x, err ? 0 : 1, + task_valid); xfrm_state_put(x); + if (!err) + cnt++; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); goto restart; } } } - spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); + if (cnt) + err = 0; + +out: + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + return err; } EXPORT_SYMBOL(xfrm_state_flush); +void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) +{ + spin_lock_bh(&net->xfrm.xfrm_state_lock); + si->sadcnt = net->xfrm.state_num; + si->sadhcnt = net->xfrm.state_hmask; + si->sadhmcnt = xfrm_state_hashmax; + spin_unlock_bh(&net->xfrm.xfrm_state_lock); +} +EXPORT_SYMBOL(xfrm_sad_getinfo); + static int -xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, - unsigned short family) +xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, + const struct xfrm_tmpl *tmpl, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, + unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -1; - afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); + afinfo->init_tempsel(&x->sel, fl); + + if (family != tmpl->encap_family) { + xfrm_state_put_afinfo(afinfo); + afinfo = xfrm_state_get_afinfo(tmpl->encap_family); + if (!afinfo) + return -1; + } + afinfo->init_temprop(x, tmpl, daddr, saddr); xfrm_state_put_afinfo(afinfo); return 0; } +static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, + const xfrm_address_t *daddr, + __be32 spi, u8 proto, + unsigned short family) +{ + unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); + struct xfrm_state *x; + + hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { + if (x->props.family != family || + x->id.spi != spi || + x->id.proto != proto || + !xfrm_addr_equal(&x->id.daddr, daddr, family)) + continue; + + if ((mark & x->mark.m) != x->mark.v) + continue; + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + u8 proto, unsigned short family) +{ + unsigned int h = xfrm_src_hash(net, daddr, saddr, family); + struct xfrm_state *x; + + hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + if (x->props.family != family || + x->id.proto != proto || + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) + continue; + + if ((mark & x->mark.m) != x->mark.v) + continue; + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) +{ + struct net *net = xs_net(x); + u32 mark = x->mark.v & x->mark.m; + + if (use_spi) + return __xfrm_state_lookup(net, mark, &x->id.daddr, + x->id.spi, x->id.proto, family); + else + return __xfrm_state_lookup_byaddr(net, mark, + &x->id.daddr, + &x->props.saddr, + x->id.proto, family); +} + +static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) +{ + if (have_hash_collision && + (net->xfrm.state_hmask + 1) < xfrm_state_hashmax && + net->xfrm.state_num > net->xfrm.state_hmask) + schedule_work(&net->xfrm.state_hash_work); +} + +static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, + const struct flowi *fl, unsigned short family, + struct xfrm_state **best, int *acq_in_progress, + int *error) +{ + /* Resolution logic: + * 1. There is a valid state with matching selector. Done. + * 2. Valid state with inappropriate selector. Skip. + * + * Entering area of "sysdeps". + * + * 3. If state is not valid, selector is temporary, it selects + * only session which triggered previous resolution. Key + * manager will do something to install a state with proper + * selector. + */ + if (x->km.state == XFRM_STATE_VALID) { + if ((x->sel.family && + !xfrm_selector_match(&x->sel, fl, x->sel.family)) || + !security_xfrm_state_pol_flow_match(x, pol, fl)) + return; + + if (!*best || + (*best)->km.dying > x->km.dying || + ((*best)->km.dying == x->km.dying && + (*best)->curlft.add_time < x->curlft.add_time)) + *best = x; + } else if (x->km.state == XFRM_STATE_ACQ) { + *acq_in_progress = 1; + } else if (x->km.state == XFRM_STATE_ERROR || + x->km.state == XFRM_STATE_EXPIRED) { + if (xfrm_selector_match(&x->sel, fl, x->sel.family) && + security_xfrm_state_pol_flow_match(x, pol, fl)) + *error = -ESRCH; + } +} + struct xfrm_state * -xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, - struct flowi *fl, struct xfrm_tmpl *tmpl, +xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + const struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned h = xfrm_dst_hash(daddr, family); - struct xfrm_state *x, *x0; + static xfrm_address_t saddr_wildcard = { }; + struct net *net = xp_net(pol); + unsigned int h, h_wildcard; + struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; - struct xfrm_state_afinfo *afinfo; - - afinfo = xfrm_state_get_afinfo(family); - if (afinfo == NULL) { - *err = -EAFNOSUPPORT; - return NULL; + u32 mark = pol->mark.v & pol->mark.m; + unsigned short encap_family = tmpl->encap_family; + struct km_event c; + + to_put = NULL; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + if (x->props.family == encap_family && + x->props.reqid == tmpl->reqid && + (mark & x->mark.m) == x->mark.v && + !(x->props.flags & XFRM_STATE_WILDRECV) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && + tmpl->mode == x->props.mode && + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, encap_family, + &best, &acquire_in_progress, &error); } + if (best || acquire_in_progress) + goto found; - spin_lock_bh(&xfrm_state_lock); - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { - if (x->props.family == family && + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); + hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && - xfrm_state_addr_check(x, daddr, saddr, family) && + (mark & x->mark.m) == x->mark.v && + !(x->props.flags & XFRM_STATE_WILDRECV) && + xfrm_addr_equal(&x->id.daddr, daddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && - (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) { - /* Resolution logic: - 1. There is a valid state with matching selector. - Done. - 2. Valid state with inappropriate selector. Skip. - - Entering area of "sysdeps". - - 3. If state is not valid, selector is temporary, - it selects only session which triggered - previous resolution. Key manager will do - something to install a state with proper - selector. - */ - if (x->km.state == XFRM_STATE_VALID) { - if (!xfrm_selector_match(&x->sel, fl, family) || - !xfrm_sec_ctx_match(pol->security, x->security)) - continue; - if (!best || - best->km.dying > x->km.dying || - (best->km.dying == x->km.dying && - best->curlft.add_time < x->curlft.add_time)) - best = x; - } else if (x->km.state == XFRM_STATE_ACQ) { - acquire_in_progress = 1; - } else if (x->km.state == XFRM_STATE_ERROR || - x->km.state == XFRM_STATE_EXPIRED) { - if (xfrm_selector_match(&x->sel, fl, family) && - xfrm_sec_ctx_match(pol->security, x->security)) - error = -ESRCH; - } - } + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, encap_family, + &best, &acquire_in_progress, &error); } +found: x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = afinfo->state_lookup(daddr, tmpl->id.spi, - tmpl->id.proto)) != NULL) { - xfrm_state_put(x0); + (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, + tmpl->id.proto, encap_family)) != NULL) { + to_put = x0; error = -EEXIST; goto out; } - x = xfrm_state_alloc(); + + c.net = net; + /* If the KMs have no listeners (yet...), avoid allocating an SA + * for each and every packet - garbage collection might not + * handle the flood. + */ + if (!km_is_alive(&c)) { + error = -ESRCH; + goto out; + } + + x = xfrm_state_alloc(net); if (x == NULL) { error = -ENOMEM; goto out; } - /* Initialize temporary selector matching only + /* Initialize temporary state matching only * to current session. */ - xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); + memcpy(&x->mark, &pol->mark, sizeof(x->mark)); + + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); + if (error) { + x->km.state = XFRM_STATE_DEAD; + to_put = x; + x = NULL; + goto out; + } if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, encap_family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - list_add(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } - x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x); - x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x->timer); + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; + tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); + net->xfrm.state_num++; + xfrm_hash_grow_check(net, x->bydst.next != NULL); } else { x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); + to_put = x; x = NULL; error = -ESRCH; } @@ -406,114 +882,443 @@ out: xfrm_state_hold(x); else *err = acquire_in_progress ? -EAGAIN : error; - spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + if (to_put) + xfrm_state_put(to_put); return x; } -static void __xfrm_state_insert(struct xfrm_state *x) +struct xfrm_state * +xfrm_stateonly_find(struct net *net, u32 mark, + xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family, u8 mode, u8 proto, u32 reqid) { - unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); + unsigned int h; + struct xfrm_state *rx = NULL, *x = NULL; - list_add(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + h = xfrm_dst_hash(net, daddr, saddr, reqid, family); + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == reqid && + (mark & x->mark.m) == x->mark.v && + !(x->props.flags & XFRM_STATE_WILDRECV) && + xfrm_state_addr_check(x, daddr, saddr, family) && + mode == x->props.mode && + proto == x->id.proto && + x->km.state == XFRM_STATE_VALID) { + rx = x; + break; + } + } + + if (rx) + xfrm_state_hold(rx); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); + return rx; +} +EXPORT_SYMBOL(xfrm_stateonly_find); - if (!mod_timer(&x->timer, jiffies + HZ)) +struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, + unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_walk *w; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + list_for_each_entry(w, &net->xfrm.state_all, all) { + x = container_of(w, struct xfrm_state, km); + if (x->props.family != family || + x->id.spi != spi) + continue; + + spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_hold(x); + return x; + } + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + return NULL; +} +EXPORT_SYMBOL(xfrm_state_lookup_byspi); + +static void __xfrm_state_insert(struct xfrm_state *x) +{ + struct net *net = xs_net(x); + unsigned int h; + + list_add(&x->km.all, &net->xfrm.state_all); - wake_up(&km_waitq); + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + + h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + + if (x->id.spi) { + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, + x->props.family); + + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + } + + tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); + if (x->replay_maxage) + mod_timer(&x->rtimer, jiffies + x->replay_maxage); + + net->xfrm.state_num++; + + xfrm_hash_grow_check(net, x->bydst.next != NULL); +} + +/* net->xfrm.xfrm_state_lock is held */ +static void __xfrm_state_bump_genids(struct xfrm_state *xnew) +{ + struct net *net = xs_net(xnew); + unsigned short family = xnew->props.family; + u32 reqid = xnew->props.reqid; + struct xfrm_state *x; + unsigned int h; + u32 mark = xnew->mark.v & xnew->mark.m; + + h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == reqid && + (mark & x->mark.m) == x->mark.v && + xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && + xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) + x->genid++; + } } void xfrm_state_insert(struct xfrm_state *x) { - spin_lock_bh(&xfrm_state_lock); - __xfrm_state_insert(x); - spin_unlock_bh(&xfrm_state_lock); + struct net *net = xs_net(x); - xfrm_flush_all_bundles(); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + __xfrm_state_bump_genids(x); + __xfrm_state_insert(x); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_insert); -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); +/* net->xfrm.xfrm_state_lock is held */ +static struct xfrm_state *__find_acq_core(struct net *net, + const struct xfrm_mark *m, + unsigned short family, u8 mode, + u32 reqid, u8 proto, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + int create) +{ + unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); + struct xfrm_state *x; + u32 mark = m->v & m->m; + + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + if (x->props.reqid != reqid || + x->props.mode != mode || + x->props.family != family || + x->km.state != XFRM_STATE_ACQ || + x->id.spi != 0 || + x->id.proto != proto || + (mark & x->mark.m) != x->mark.v || + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) + continue; + + xfrm_state_hold(x); + return x; + } + + if (!create) + return NULL; + + x = xfrm_state_alloc(net); + if (likely(x)) { + switch (family) { + case AF_INET: + x->sel.daddr.a4 = daddr->a4; + x->sel.saddr.a4 = saddr->a4; + x->sel.prefixlen_d = 32; + x->sel.prefixlen_s = 32; + x->props.saddr.a4 = saddr->a4; + x->id.daddr.a4 = daddr->a4; + break; + + case AF_INET6: + *(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr; + *(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr; + x->sel.prefixlen_d = 128; + x->sel.prefixlen_s = 128; + *(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr; + *(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr; + break; + } + + x->km.state = XFRM_STATE_ACQ; + x->id.proto = proto; + x->props.family = family; + x->props.mode = mode; + x->props.reqid = reqid; + x->mark.v = m->v; + x->mark.m = m->m; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; + xfrm_state_hold(x); + tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + + net->xfrm.state_num++; + + xfrm_hash_grow_check(net, x->bydst.next != NULL); + } + + return x; +} + +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_add(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; - struct xfrm_state *x1; + struct net *net = xs_net(x); + struct xfrm_state *x1, *to_put; int family; int err; + u32 mark = x->mark.v & x->mark.m; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); + to_put = NULL; - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + + x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { - xfrm_state_put(x1); + to_put = x1; x1 = NULL; err = -EEXIST; goto out; } - if (x->km.seq) { - x1 = __xfrm_find_acq_byseq(x->km.seq); - if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { - xfrm_state_put(x1); + if (use_spi && x->km.seq) { + x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq); + if (x1 && ((x1->id.proto != x->id.proto) || + !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) { + to_put = x1; x1 = NULL; } } - if (!x1) - x1 = afinfo->find_acq( - x->props.mode, x->props.reqid, x->id.proto, - &x->id.daddr, &x->props.saddr, 0); + if (use_spi && !x1) + x1 = __find_acq_core(net, &x->mark, family, x->props.mode, + x->props.reqid, x->id.proto, + &x->id.daddr, &x->props.saddr, 0); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); err = 0; out: - spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); - - if (!err) - xfrm_flush_all_bundles(); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (x1) { xfrm_state_delete(x1); xfrm_state_put(x1); } + if (to_put) + xfrm_state_put(to_put); + return err; } EXPORT_SYMBOL(xfrm_state_add); +#ifdef CONFIG_XFRM_MIGRATE +static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) +{ + struct net *net = xs_net(orig); + struct xfrm_state *x = xfrm_state_alloc(net); + if (!x) + goto out; + + memcpy(&x->id, &orig->id, sizeof(x->id)); + memcpy(&x->sel, &orig->sel, sizeof(x->sel)); + memcpy(&x->lft, &orig->lft, sizeof(x->lft)); + x->props.mode = orig->props.mode; + x->props.replay_window = orig->props.replay_window; + x->props.reqid = orig->props.reqid; + x->props.family = orig->props.family; + x->props.saddr = orig->props.saddr; + + if (orig->aalg) { + x->aalg = xfrm_algo_auth_clone(orig->aalg); + if (!x->aalg) + goto error; + } + x->props.aalgo = orig->props.aalgo; + + if (orig->aead) { + x->aead = xfrm_algo_aead_clone(orig->aead); + if (!x->aead) + goto error; + } + if (orig->ealg) { + x->ealg = xfrm_algo_clone(orig->ealg); + if (!x->ealg) + goto error; + } + x->props.ealgo = orig->props.ealgo; + + if (orig->calg) { + x->calg = xfrm_algo_clone(orig->calg); + if (!x->calg) + goto error; + } + x->props.calgo = orig->props.calgo; + + if (orig->encap) { + x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL); + if (!x->encap) + goto error; + } + + if (orig->coaddr) { + x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr), + GFP_KERNEL); + if (!x->coaddr) + goto error; + } + + if (orig->replay_esn) { + if (xfrm_replay_clone(x, orig)) + goto error; + } + + memcpy(&x->mark, &orig->mark, sizeof(x->mark)); + + if (xfrm_init_state(x) < 0) + goto error; + + x->props.flags = orig->props.flags; + x->props.extra_flags = orig->props.extra_flags; + + x->tfcpad = orig->tfcpad; + x->replay_maxdiff = orig->replay_maxdiff; + x->replay_maxage = orig->replay_maxage; + x->curlft.add_time = orig->curlft.add_time; + x->km.state = orig->km.state; + x->km.seq = orig->km.seq; + + return x; + + error: + xfrm_state_put(x); +out: + return NULL; +} + +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) +{ + unsigned int h; + struct xfrm_state *x = NULL; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + + if (m->reqid) { + h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, + m->reqid, m->old_family); + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + if (x->props.mode != m->mode || + x->id.proto != m->proto) + continue; + if (m->reqid && x->props.reqid != m->reqid) + continue; + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) + continue; + xfrm_state_hold(x); + break; + } + } else { + h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, + m->old_family); + hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + if (x->props.mode != m->mode || + x->id.proto != m->proto) + continue; + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) + continue; + xfrm_state_hold(x); + break; + } + } + + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + return x; +} +EXPORT_SYMBOL(xfrm_migrate_state_find); + +struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, + struct xfrm_migrate *m) +{ + struct xfrm_state *xc; + + xc = xfrm_state_clone(x); + if (!xc) + return NULL; + + memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr)); + memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); + + /* add state */ + if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) { + /* a care is needed when the destination address of the + state is to be updated as it is a part of triplet */ + xfrm_state_insert(xc); + } else { + if (xfrm_state_add(xc) < 0) + goto error; + } + + return xc; +error: + xfrm_state_put(xc); + return NULL; +} +EXPORT_SYMBOL(xfrm_state_migrate); +#endif + int xfrm_state_update(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; - struct xfrm_state *x1; + struct xfrm_state *x1, *to_put; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); + struct net *net = xs_net(x); - afinfo = xfrm_state_get_afinfo(x->props.family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; + to_put = NULL; - spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x1 = __xfrm_state_locate(x, use_spi, x->props.family); err = -ESRCH; if (!x1) goto out; if (xfrm_state_kern(x1)) { - xfrm_state_put(x1); + to_put = x1; err = -EEXIST; goto out; } @@ -525,8 +1330,10 @@ int xfrm_state_update(struct xfrm_state *x) err = 0; out: - spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + if (to_put) + xfrm_state_put(to_put); if (err) return err; @@ -542,15 +1349,21 @@ out: if (likely(x1->km.state == XFRM_STATE_VALID)) { if (x->encap && x1->encap) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + if (x->coaddr && x1->coaddr) { + memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); + } + if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) + memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - if (!mod_timer(&x1->timer, jiffies + HZ)) - xfrm_state_hold(x1); + tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); if (x1->curlft.use_time) xfrm_state_check_expire(x1); err = 0; + x->km.state = XFRM_STATE_DEAD; + __xfrm_state_put(x); } spin_unlock_bh(&x1->lock); @@ -563,16 +1376,12 @@ EXPORT_SYMBOL(xfrm_state_update); int xfrm_state_check_expire(struct xfrm_state *x) { if (!x->curlft.use_time) - x->curlft.use_time = (unsigned long)xtime.tv_sec; - - if (x->km.state != XFRM_STATE_VALID) - return -EINVAL; + x->curlft.use_time = get_seconds(); if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - if (!mod_timer(&x->timer, jiffies)) - xfrm_state_hold(x); + tasklet_hrtimer_start(&x->mtimer, ktime_set(0, 0), HRTIMER_MODE_REL); return -EINVAL; } @@ -580,80 +1389,107 @@ int xfrm_state_check_expire(struct xfrm_state *x) (x->curlft.bytes >= x->lft.soft_byte_limit || x->curlft.packets >= x->lft.soft_packet_limit)) { x->km.dying = 1; - km_state_expired(x, 0); + km_state_expired(x, 0, 0); } return 0; } EXPORT_SYMBOL(xfrm_state_check_expire); -static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) +struct xfrm_state * +xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, + u8 proto, unsigned short family) { - int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) - - skb_headroom(skb); - - if (nhead > 0) - return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + struct xfrm_state *x; - /* Check tail too... */ - return 0; + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + return x; } +EXPORT_SYMBOL(xfrm_state_lookup); -int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb) +struct xfrm_state * +xfrm_state_lookup_byaddr(struct net *net, u32 mark, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, + u8 proto, unsigned short family) { - int err = xfrm_state_check_expire(x); - if (err < 0) - goto err; - err = xfrm_state_check_space(x, skb); -err: - return err; + struct xfrm_state *x; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + return x; } -EXPORT_SYMBOL(xfrm_state_check); +EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * -xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, - unsigned short family) +xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, + u8 proto, const xfrm_address_t *daddr, + const xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + return x; +} +EXPORT_SYMBOL(xfrm_find_acq); + +#ifdef CONFIG_XFRM_SUB_POLICY +int +xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, + unsigned short family, struct net *net) +{ + int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) - return NULL; + return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup(daddr, spi, proto); - spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/ + if (afinfo->tmpl_sort) + err = afinfo->tmpl_sort(dst, src, n); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_put_afinfo(afinfo); - return x; + return err; } -EXPORT_SYMBOL(xfrm_state_lookup); +EXPORT_SYMBOL(xfrm_tmpl_sort); -struct xfrm_state * -xfrm_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create, unsigned short family) +int +xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, + unsigned short family) { - struct xfrm_state *x; + int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + struct net *net = xs_net(*src); + if (!afinfo) - return NULL; + return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); - x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); - spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + if (afinfo->state_sort) + err = afinfo->state_sort(dst, src, n); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_put_afinfo(afinfo); - return x; + return err; } -EXPORT_SYMBOL(xfrm_find_acq); +EXPORT_SYMBOL(xfrm_state_sort); +#endif /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { int i; - struct xfrm_state *x; - for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { + struct xfrm_state *x; + + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + if (x->km.seq == seq && + (mark & x->mark.m) == x->mark.v && + x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; } @@ -662,13 +1498,13 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); - x = __xfrm_find_acq_byseq(seq); - spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x = __xfrm_find_acq_byseq(net, mark, seq); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_find_acq_byseq); @@ -676,39 +1512,72 @@ EXPORT_SYMBOL(xfrm_find_acq_byseq); u32 xfrm_get_acqseq(void) { u32 res; - static u32 acqseq; - static DEFINE_SPINLOCK(acqseq_lock); + static atomic_t acqseq; + + do { + res = atomic_inc_return(&acqseq); + } while (!res); - spin_lock_bh(&acqseq_lock); - res = (++acqseq ? : ++acqseq); - spin_unlock_bh(&acqseq_lock); return res; } EXPORT_SYMBOL(xfrm_get_acqseq); -void -xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) +int verify_spi_info(u8 proto, u32 min, u32 max) +{ + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + break; + + case IPPROTO_COMP: + /* IPCOMP spi is 16-bits. */ + if (max >= 0x10000) + return -EINVAL; + break; + + default: + return -EINVAL; + } + + if (min > max) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(verify_spi_info); + +int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { - u32 h; + struct net *net = xs_net(x); + unsigned int h; struct xfrm_state *x0; + int err = -ENOENT; + __be32 minspi = htonl(low); + __be32 maxspi = htonl(high); + u32 mark = x->mark.v & x->mark.m; + spin_lock_bh(&x->lock); + if (x->km.state == XFRM_STATE_DEAD) + goto unlock; + + err = 0; if (x->id.spi) - return; + goto unlock; + + err = -ENOENT; if (minspi == maxspi) { - x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); - return; + goto unlock; } x->id.spi = minspi; } else { u32 spi = 0; - minspi = ntohl(minspi); - maxspi = ntohl(maxspi); - for (h=0; h<maxspi-minspi+1; h++) { - spi = minspi + net_random()%(maxspi-minspi+1); - x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family); + for (h = 0; h < high-low+1; h++) { + spi = low + prandom_u32()%(high-low+1); + x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); break; @@ -717,183 +1586,264 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) } } if (x->id.spi) { - spin_lock_bh(&xfrm_state_lock); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); - spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); + spin_lock_bh(&net->xfrm.xfrm_state_lock); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + err = 0; } + +unlock: + spin_unlock_bh(&x->lock); + + return err; } EXPORT_SYMBOL(xfrm_alloc_spi); -int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), +static bool __xfrm_state_filter_match(struct xfrm_state *x, + struct xfrm_address_filter *filter) +{ + if (filter) { + if ((filter->family == AF_INET || + filter->family == AF_INET6) && + x->props.family != filter->family) + return false; + + return addr_match(&x->props.saddr, &filter->saddr, + filter->splen) && + addr_match(&x->id.daddr, &filter->daddr, + filter->dplen); + } + return true; +} + +int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, + int (*func)(struct xfrm_state *, int, void*), void *data) { - int i; - struct xfrm_state *x; - int count = 0; + struct xfrm_state *state; + struct xfrm_state_walk *x; int err = 0; - spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) - count++; + if (walk->seq != 0 && list_empty(&walk->all)) + return 0; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + if (list_empty(&walk->all)) + x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all); + else + x = list_entry(&walk->all, struct xfrm_state_walk, all); + list_for_each_entry_from(x, &net->xfrm.state_all, all) { + if (x->state == XFRM_STATE_DEAD) + continue; + state = container_of(x, struct xfrm_state, km); + if (!xfrm_id_proto_match(state->id.proto, walk->proto)) + continue; + if (!__xfrm_state_filter_match(state, walk->filter)) + continue; + err = func(state, walk->seq, data); + if (err) { + list_move_tail(&walk->all, &x->all); + goto out; } + walk->seq++; } - if (count == 0) { + if (walk->seq == 0) { err = -ENOENT; goto out; } - - for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) - continue; - err = func(x, --count, data); - if (err) - goto out; - } - } + list_del_init(&walk->all); out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_walk); -int xfrm_replay_check(struct xfrm_state *x, u32 seq) +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto, + struct xfrm_address_filter *filter) { - u32 diff; - - seq = ntohl(seq); - - if (unlikely(seq == 0)) - return -EINVAL; + INIT_LIST_HEAD(&walk->all); + walk->proto = proto; + walk->state = XFRM_STATE_DEAD; + walk->seq = 0; + walk->filter = filter; +} +EXPORT_SYMBOL(xfrm_state_walk_init); - if (likely(seq > x->replay.seq)) - return 0; +void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net) +{ + kfree(walk->filter); - diff = x->replay.seq - seq; - if (diff >= x->props.replay_window) { - x->stats.replay_window++; - return -EINVAL; - } + if (list_empty(&walk->all)) + return; - if (x->replay.bitmap & (1U << diff)) { - x->stats.replay++; - return -EINVAL; - } - return 0; + spin_lock_bh(&net->xfrm.xfrm_state_lock); + list_del(&walk->all); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } -EXPORT_SYMBOL(xfrm_replay_check); +EXPORT_SYMBOL(xfrm_state_walk_done); -void xfrm_replay_advance(struct xfrm_state *x, u32 seq) +static void xfrm_replay_timer_handler(unsigned long data) { - u32 diff; + struct xfrm_state *x = (struct xfrm_state *)data; - seq = ntohl(seq); + spin_lock(&x->lock); - if (seq > x->replay.seq) { - diff = seq - x->replay.seq; - if (diff < x->props.replay_window) - x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; + if (x->km.state == XFRM_STATE_VALID) { + if (xfrm_aevent_is_on(xs_net(x))) + x->repl->notify(x, XFRM_REPLAY_TIMEOUT); else - x->replay.bitmap = 1; - x->replay.seq = seq; - } else { - diff = x->replay.seq - seq; - x->replay.bitmap |= (1U << diff); + x->xflags |= XFRM_TIME_DEFER; } + + spin_unlock(&x->lock); } -EXPORT_SYMBOL(xfrm_replay_advance); -static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); -static DEFINE_RWLOCK(xfrm_km_lock); +static LIST_HEAD(xfrm_km_list); -void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) if (km->notify_policy) km->notify_policy(xp, dir, c); - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); } -void km_state_notify(struct xfrm_state *x, struct km_event *c) +void km_state_notify(struct xfrm_state *x, const struct km_event *c) { struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) if (km->notify) km->notify(x, c); - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); } EXPORT_SYMBOL(km_policy_notify); EXPORT_SYMBOL(km_state_notify); -static void km_state_expired(struct xfrm_state *x, int hard) +void km_state_expired(struct xfrm_state *x, int hard, u32 portid) { struct km_event c; c.data.hard = hard; + c.portid = portid; c.event = XFRM_MSG_EXPIRE; km_state_notify(x, &c); - - if (hard) - wake_up(&km_waitq); } +EXPORT_SYMBOL(km_state_expired); /* * We send to all registered managers regardless of failure * We are happy with one success */ -static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) +int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) { int err = -EINVAL, acqret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { - acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT); + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { + acqret = km->acquire(x, t, pol); if (!acqret) err = acqret; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } +EXPORT_SYMBOL(km_query); -int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport) +int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { int err = -EINVAL; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->new_mapping) err = km->new_mapping(x, ipaddr, sport); if (!err) break; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_new_mapping); -void km_policy_expired(struct xfrm_policy *pol, int dir, int hard) +void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) { struct km_event c; c.data.hard = hard; + c.portid = portid; c.event = XFRM_MSG_POLEXPIRE; km_policy_notify(pol, dir, &c); +} +EXPORT_SYMBOL(km_policy_expired); - if (hard) - wake_up(&km_waitq); +#ifdef CONFIG_XFRM_MIGRATE +int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { + if (km->migrate) { + ret = km->migrate(sel, dir, type, m, num_migrate, k); + if (!ret) + err = ret; + } + } + rcu_read_unlock(); + return err; } +EXPORT_SYMBOL(km_migrate); +#endif + +int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { + if (km->report) { + ret = km->report(net, proto, sel, addr); + if (!ret) + err = ret; + } + } + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(km_report); + +bool km_is_alive(const struct km_event *c) +{ + struct xfrm_mgr *km; + bool is_alive = false; + + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { + if (km->is_alive && km->is_alive(c)) { + is_alive = true; + break; + } + } + rcu_read_unlock(); + + return is_alive; +} +EXPORT_SYMBOL(km_is_alive); int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { @@ -914,14 +1864,14 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen goto out; err = -EINVAL; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { - pol = km->compile_policy(sk->sk_family, optname, data, + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { + pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); if (err >= 0) { xfrm_sk_policy_insert(sk, err, pol); @@ -935,20 +1885,23 @@ out: } EXPORT_SYMBOL(xfrm_user_policy); +static DEFINE_SPINLOCK(xfrm_km_lock); + int xfrm_register_km(struct xfrm_mgr *km) { - write_lock_bh(&xfrm_km_lock); - list_add_tail(&km->list, &xfrm_km_list); - write_unlock_bh(&xfrm_km_lock); + spin_lock_bh(&xfrm_km_lock); + list_add_tail_rcu(&km->list, &xfrm_km_list); + spin_unlock_bh(&xfrm_km_lock); return 0; } EXPORT_SYMBOL(xfrm_register_km); int xfrm_unregister_km(struct xfrm_mgr *km) { - write_lock_bh(&xfrm_km_lock); - list_del(&km->list); - write_unlock_bh(&xfrm_km_lock); + spin_lock_bh(&xfrm_km_lock); + list_del_rcu(&km->list); + spin_unlock_bh(&xfrm_km_lock); + synchronize_rcu(); return 0; } EXPORT_SYMBOL(xfrm_unregister_km); @@ -960,15 +1913,12 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock(&xfrm_state_afinfo_lock); + spin_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; - else { - afinfo->state_bydst = xfrm_state_bydst; - afinfo->state_byspi = xfrm_state_byspi; - xfrm_state_afinfo[afinfo->family] = afinfo; - } - write_unlock(&xfrm_state_afinfo_lock); + else + rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo); + spin_unlock_bh(&xfrm_state_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_state_register_afinfo); @@ -980,39 +1930,34 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock(&xfrm_state_afinfo_lock); + spin_lock_bh(&xfrm_state_afinfo_lock); if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { - xfrm_state_afinfo[afinfo->family] = NULL; - afinfo->state_byspi = NULL; - afinfo->state_bydst = NULL; - } + else + RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL); } - write_unlock(&xfrm_state_afinfo_lock); + spin_unlock_bh(&xfrm_state_afinfo_lock); + synchronize_rcu(); return err; } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) return NULL; - read_lock(&xfrm_state_afinfo_lock); - afinfo = xfrm_state_afinfo[family]; - if (likely(afinfo != NULL)) - read_lock(&afinfo->lock); - read_unlock(&xfrm_state_afinfo_lock); + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_state_afinfo[family]); + if (unlikely(!afinfo)) + rcu_read_unlock(); return afinfo; } -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { - if (unlikely(afinfo == NULL)) - return; - read_unlock(&afinfo->lock); + rcu_read_unlock(); } /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ @@ -1030,45 +1975,24 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -/* - * This function is NOT optimal. For example, with ESP it will give an - * MTU that's usually two bytes short of being optimal. However, it will - * usually give an answer that's a multiple of 4 provided the input is - * also a multiple of 4. - */ int xfrm_state_mtu(struct xfrm_state *x, int mtu) { - int res = mtu; - - res -= x->props.header_len; - - for (;;) { - int m = res; - - if (m < 68) - return 68; - - spin_lock_bh(&x->lock); - if (x->km.state == XFRM_STATE_VALID && - x->type && x->type->get_max_size) - m = x->type->get_max_size(x, m); - else - m += x->props.header_len; - spin_unlock_bh(&x->lock); - - if (m <= mtu) - break; - res -= (m - mtu); - } + int res; + spin_lock_bh(&x->lock); + if (x->km.state == XFRM_STATE_VALID && + x->type && x->type->get_mtu) + res = x->type->get_mtu(x, mtu); + else + res = mtu - x->props.header_len; + spin_unlock_bh(&x->lock); return res; } -EXPORT_SYMBOL(xfrm_state_mtu); - -int xfrm_init_state(struct xfrm_state *x) +int __xfrm_init_state(struct xfrm_state *x, bool init_replay) { struct xfrm_state_afinfo *afinfo; + struct xfrm_mode *inner_mode; int family = x->props.family; int err; @@ -1087,6 +2011,45 @@ int xfrm_init_state(struct xfrm_state *x) goto error; err = -EPROTONOSUPPORT; + + if (x->sel.family != AF_UNSPEC) { + inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); + if (inner_mode == NULL) + goto error; + + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && + family != x->sel.family) { + xfrm_put_mode(inner_mode); + goto error; + } + + x->inner_mode = inner_mode; + } else { + struct xfrm_mode *inner_mode_iaf; + int iafamily = AF_INET; + + inner_mode = xfrm_get_mode(x->props.mode, x->props.family); + if (inner_mode == NULL) + goto error; + + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { + xfrm_put_mode(inner_mode); + goto error; + } + x->inner_mode = inner_mode; + + if (x->props.family == AF_INET) + iafamily = AF_INET6; + + inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily); + if (inner_mode_iaf) { + if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL) + x->inner_mode_iaf = inner_mode_iaf; + else + xfrm_put_mode(inner_mode_iaf); + } + } + x->type = xfrm_get_type(x->id.proto, family); if (x->type == NULL) goto error; @@ -1095,22 +2058,244 @@ int xfrm_init_state(struct xfrm_state *x) if (err) goto error; + x->outer_mode = xfrm_get_mode(x->props.mode, family); + if (x->outer_mode == NULL) { + err = -EPROTONOSUPPORT; + goto error; + } + + if (init_replay) { + err = xfrm_init_replay(x); + if (err) + goto error; + } + x->km.state = XFRM_STATE_VALID; error: return err; } +EXPORT_SYMBOL(__xfrm_init_state); + +int xfrm_init_state(struct xfrm_state *x) +{ + return __xfrm_init_state(x, true); +} + EXPORT_SYMBOL(xfrm_init_state); - -void __init xfrm_state_init(void) + +int __net_init xfrm_state_init(struct net *net) { - int i; + unsigned int sz; + + INIT_LIST_HEAD(&net->xfrm.state_all); + + sz = sizeof(struct hlist_head) * 8; + + net->xfrm.state_bydst = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bydst) + goto out_bydst; + net->xfrm.state_bysrc = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bysrc) + goto out_bysrc; + net->xfrm.state_byspi = xfrm_hash_alloc(sz); + if (!net->xfrm.state_byspi) + goto out_byspi; + net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + + net->xfrm.state_num = 0; + INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); + INIT_HLIST_HEAD(&net->xfrm.state_gc_list); + INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); + spin_lock_init(&net->xfrm.xfrm_state_lock); + return 0; - for (i=0; i<XFRM_DST_HSIZE; i++) { - INIT_LIST_HEAD(&xfrm_state_bydst[i]); - INIT_LIST_HEAD(&xfrm_state_byspi[i]); +out_byspi: + xfrm_hash_free(net->xfrm.state_bysrc, sz); +out_bysrc: + xfrm_hash_free(net->xfrm.state_bydst, sz); +out_bydst: + return -ENOMEM; +} + +void xfrm_state_fini(struct net *net) +{ + unsigned int sz; + + flush_work(&net->xfrm.state_hash_work); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + flush_work(&net->xfrm.state_gc_work); + + WARN_ON(!list_empty(&net->xfrm.state_all)); + + sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.state_byspi)); + xfrm_hash_free(net->xfrm.state_byspi, sz); + WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); + xfrm_hash_free(net->xfrm.state_bysrc, sz); + WARN_ON(!hlist_empty(net->xfrm.state_bydst)); + xfrm_hash_free(net->xfrm.state_bydst, sz); +} + +#ifdef CONFIG_AUDITSYSCALL +static void xfrm_audit_helper_sainfo(struct xfrm_state *x, + struct audit_buffer *audit_buf) +{ + struct xfrm_sec_ctx *ctx = x->security; + u32 spi = ntohl(x->id.spi); + + if (ctx) + audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", + ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); + + switch (x->props.family) { + case AF_INET: + audit_log_format(audit_buf, " src=%pI4 dst=%pI4", + &x->props.saddr.a4, &x->id.daddr.a4); + break; + case AF_INET6: + audit_log_format(audit_buf, " src=%pI6 dst=%pI6", + x->props.saddr.a6, x->id.daddr.a6); + break; } - INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); + + audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi); } +static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, + struct audit_buffer *audit_buf) +{ + const struct iphdr *iph4; + const struct ipv6hdr *iph6; + + switch (family) { + case AF_INET: + iph4 = ip_hdr(skb); + audit_log_format(audit_buf, " src=%pI4 dst=%pI4", + &iph4->saddr, &iph4->daddr); + break; + case AF_INET6: + iph6 = ipv6_hdr(skb); + audit_log_format(audit_buf, + " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x", + &iph6->saddr, &iph6->daddr, + iph6->flow_lbl[0] & 0x0f, + iph6->flow_lbl[1], + iph6->flow_lbl[2]); + break; + } +} + +void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid) +{ + struct audit_buffer *audit_buf; + + audit_buf = xfrm_audit_start("SAD-add"); + if (audit_buf == NULL) + return; + xfrm_audit_helper_usrinfo(task_valid, audit_buf); + xfrm_audit_helper_sainfo(x, audit_buf); + audit_log_format(audit_buf, " res=%u", result); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_add); + +void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid) +{ + struct audit_buffer *audit_buf; + + audit_buf = xfrm_audit_start("SAD-delete"); + if (audit_buf == NULL) + return; + xfrm_audit_helper_usrinfo(task_valid, audit_buf); + xfrm_audit_helper_sainfo(x, audit_buf); + audit_log_format(audit_buf, " res=%u", result); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_delete); + +void xfrm_audit_state_replay_overflow(struct xfrm_state *x, + struct sk_buff *skb) +{ + struct audit_buffer *audit_buf; + u32 spi; + + audit_buf = xfrm_audit_start("SA-replay-overflow"); + if (audit_buf == NULL) + return; + xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf); + /* don't record the sequence number because it's inherent in this kind + * of audit message */ + spi = ntohl(x->id.spi); + audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow); + +void xfrm_audit_state_replay(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + struct audit_buffer *audit_buf; + u32 spi; + + audit_buf = xfrm_audit_start("SA-replayed-pkt"); + if (audit_buf == NULL) + return; + xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf); + spi = ntohl(x->id.spi); + audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u", + spi, spi, ntohl(net_seq)); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_replay); + +void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family) +{ + struct audit_buffer *audit_buf; + + audit_buf = xfrm_audit_start("SA-notfound"); + if (audit_buf == NULL) + return; + xfrm_audit_helper_pktinfo(skb, family, audit_buf); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple); + +void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family, + __be32 net_spi, __be32 net_seq) +{ + struct audit_buffer *audit_buf; + u32 spi; + + audit_buf = xfrm_audit_start("SA-notfound"); + if (audit_buf == NULL) + return; + xfrm_audit_helper_pktinfo(skb, family, audit_buf); + spi = ntohl(net_spi); + audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u", + spi, spi, ntohl(net_seq)); + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound); + +void xfrm_audit_state_icvfail(struct xfrm_state *x, + struct sk_buff *skb, u8 proto) +{ + struct audit_buffer *audit_buf; + __be32 net_spi; + __be32 net_seq; + + audit_buf = xfrm_audit_start("SA-icv-failure"); + if (audit_buf == NULL) + return; + xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf); + if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) { + u32 spi = ntohl(net_spi); + audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u", + spi, spi, ntohl(net_seq)); + } + audit_log_end(audit_buf); +} +EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail); +#endif /* CONFIG_AUDITSYSCALL */ diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c new file mode 100644 index 00000000000..05a6e3d9c25 --- /dev/null +++ b/net/xfrm/xfrm_sysctl.c @@ -0,0 +1,86 @@ +#include <linux/sysctl.h> +#include <linux/slab.h> +#include <net/net_namespace.h> +#include <net/xfrm.h> + +static void __net_init __xfrm_sysctl_init(struct net *net) +{ + net->xfrm.sysctl_aevent_etime = XFRM_AE_ETIME; + net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE; + net->xfrm.sysctl_larval_drop = 1; + net->xfrm.sysctl_acq_expires = 30; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table xfrm_table[] = { + { + .procname = "xfrm_aevent_etime", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "xfrm_aevent_rseqth", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "xfrm_larval_drop", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "xfrm_acq_expires", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + {} +}; + +int __net_init xfrm_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + __xfrm_sysctl_init(net); + + table = kmemdup(xfrm_table, sizeof(xfrm_table), GFP_KERNEL); + if (!table) + goto out_kmemdup; + table[0].data = &net->xfrm.sysctl_aevent_etime; + table[1].data = &net->xfrm.sysctl_aevent_rseqth; + table[2].data = &net->xfrm.sysctl_larval_drop; + table[3].data = &net->xfrm.sysctl_acq_expires; + + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; + + net->xfrm.sysctl_hdr = register_net_sysctl(net, "net/core", table); + if (!net->xfrm.sysctl_hdr) + goto out_register; + return 0; + +out_register: + kfree(table); +out_kmemdup: + return -ENOMEM; +} + +void __net_exit xfrm_sysctl_fini(struct net *net) +{ + struct ctl_table *table; + + table = net->xfrm.sysctl_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->xfrm.sysctl_hdr); + kfree(table); +} +#else +int __net_init xfrm_sysctl_init(struct net *net) +{ + __xfrm_sysctl_init(net); + return 0; +} +#endif diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ac87a09ba83..d4db6ebb089 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -10,6 +10,7 @@ * */ +#include <linux/crypto.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> @@ -18,7 +19,6 @@ #include <linux/string.h> #include <linux/net.h> #include <linux/skbuff.h> -#include <linux/rtnetlink.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/init.h> @@ -26,98 +26,129 @@ #include <net/sock.h> #include <net/xfrm.h> #include <net/netlink.h> +#include <net/ah.h> #include <asm/uaccess.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <linux/in6.h> +#endif -static struct sock *xfrm_nl; - -static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) +static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) { - struct rtattr *rt = xfrma[type - 1]; + struct nlattr *rt = attrs[type]; struct xfrm_algo *algp; - int len; if (!rt) return 0; - len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp); - if (len < 0) - return -EINVAL; - - algp = RTA_DATA(rt); - - len -= (algp->alg_key_len + 7U) / 8; - if (len < 0) + algp = nla_data(rt); + if (nla_len(rt) < xfrm_alg_len(algp)) return -EINVAL; switch (type) { case XFRMA_ALG_AUTH: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "digest_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_CRYPT: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "cipher_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_COMP: - /* Zero length keys are legal. */ break; default: return -EINVAL; - }; + } algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; return 0; } -static int verify_encap_tmpl(struct rtattr **xfrma) +static int verify_auth_trunc(struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_ENCAP - 1]; - struct xfrm_encap_tmpl *encap; + struct nlattr *rt = attrs[XFRMA_ALG_AUTH_TRUNC]; + struct xfrm_algo_auth *algp; if (!rt) return 0; - if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap)) + algp = nla_data(rt); + if (nla_len(rt) < xfrm_alg_auth_len(algp)) return -EINVAL; + algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; return 0; } +static int verify_aead(struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_ALG_AEAD]; + struct xfrm_algo_aead *algp; + + if (!rt) + return 0; + + algp = nla_data(rt); + if (nla_len(rt) < aead_len(algp)) + return -EINVAL; + + algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + return 0; +} + +static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type, + xfrm_address_t **addrp) +{ + struct nlattr *rt = attrs[type]; + + if (rt && addrp) + *addrp = nla_data(rt); +} -static inline int verify_sec_ctx_len(struct rtattr **xfrma) +static inline int verify_sec_ctx_len(struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; - int len = 0; if (!rt) return 0; - if (rt->rta_len < sizeof(*uctx)) + uctx = nla_data(rt); + if (uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) return -EINVAL; - uctx = RTA_DATA(rt); + return 0; +} - if (uctx->ctx_len > PAGE_SIZE) - return -EINVAL; +static inline int verify_replay(struct xfrm_usersa_info *p, + struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + struct xfrm_replay_state_esn *rs; + + if (p->flags & XFRM_STATE_ESN) { + if (!rt) + return -EINVAL; + + rs = nla_data(rt); - len += sizeof(struct xfrm_user_sec_ctx); - len += uctx->ctx_len; + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; - if (uctx->len != len) + if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; + } + + if (!rt) + return 0; + + /* As only ESP and AH support ESN feature. */ + if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) + return -EINVAL; + + if (p->replay_window != 0) return -EINVAL; return 0; } - static int verify_newsa_info(struct xfrm_usersa_info *p, - struct rtattr **xfrma) + struct nlattr **attrs) { int err; @@ -127,7 +158,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, break; case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) break; #else err = -EAFNOSUPPORT; @@ -136,55 +167,95 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, default: goto out; - }; + } err = -EINVAL; switch (p->id.proto) { case IPPROTO_AH: - if (!xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1] || - xfrma[XFRMA_ALG_COMP-1]) + if ((!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_AUTH_TRUNC]) || + attrs[XFRMA_ALG_AEAD] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_TFCPAD]) goto out; break; case IPPROTO_ESP: - if ((!xfrma[XFRMA_ALG_AUTH-1] && - !xfrma[XFRMA_ALG_CRYPT-1]) || - xfrma[XFRMA_ALG_COMP-1]) + if (attrs[XFRMA_ALG_COMP]) + goto out; + if (!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_AUTH_TRUNC] && + !attrs[XFRMA_ALG_CRYPT] && + !attrs[XFRMA_ALG_AEAD]) + goto out; + if ((attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || + attrs[XFRMA_ALG_CRYPT]) && + attrs[XFRMA_ALG_AEAD]) + goto out; + if (attrs[XFRMA_TFCPAD] && + p->mode != XFRM_MODE_TUNNEL) goto out; break; case IPPROTO_COMP: - if (!xfrma[XFRMA_ALG_COMP-1] || - xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1]) + if (!attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_ALG_AEAD] || + attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_TFCPAD] || + (ntohl(p->id.spi) >= 0x10000)) goto out; break; +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + if (attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || + attrs[XFRMA_ALG_AEAD] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_ENCAP] || + attrs[XFRMA_SEC_CTX] || + attrs[XFRMA_TFCPAD] || + !attrs[XFRMA_COADDR]) + goto out; + break; +#endif + default: goto out; - }; + } - if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH))) + if ((err = verify_aead(attrs))) + goto out; + if ((err = verify_auth_trunc(attrs))) goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) goto out; - if ((err = verify_encap_tmpl(xfrma))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_COMP))) goto out; - if ((err = verify_sec_ctx_len(xfrma))) + if ((err = verify_sec_ctx_len(attrs))) + goto out; + if ((err = verify_replay(p, attrs))) goto out; err = -EINVAL; switch (p->mode) { - case 0: - case 1: + case XFRM_MODE_TRANSPORT: + case XFRM_MODE_TUNNEL: + case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_BEET: break; default: goto out; - }; + } err = 0; @@ -193,74 +264,177 @@ out: } static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, - struct xfrm_algo_desc *(*get_byname)(char *, int), - struct rtattr *u_arg) + struct xfrm_algo_desc *(*get_byname)(const char *, int), + struct nlattr *rta) { - struct rtattr *rta = u_arg; struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; - int len; if (!rta) return 0; - ualg = RTA_DATA(rta); + ualg = nla_data(rta); algo = get_byname(ualg->alg_name, 1); if (!algo) return -ENOSYS; *props = algo->desc.sadb_alg_id; - len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8; - p = kmalloc(len, GFP_KERNEL); + p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL); if (!p) return -ENOMEM; - memcpy(p, ualg, len); + strcpy(p->alg_name, algo->name); *algpp = p; return 0; } -static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_arg) +static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props, + struct nlattr *rta) { - struct rtattr *rta = u_arg; - struct xfrm_encap_tmpl *p, *uencap; + struct xfrm_algo *ualg; + struct xfrm_algo_auth *p; + struct xfrm_algo_desc *algo; if (!rta) return 0; - uencap = RTA_DATA(rta); - p = kmalloc(sizeof(*p), GFP_KERNEL); + ualg = nla_data(rta); + + algo = xfrm_aalg_get_byname(ualg->alg_name, 1); + if (!algo) + return -ENOSYS; + *props = algo->desc.sadb_alg_id; + + p = kmalloc(sizeof(*p) + (ualg->alg_key_len + 7) / 8, GFP_KERNEL); if (!p) return -ENOMEM; - memcpy(p, uencap, sizeof(*p)); - *encapp = p; + strcpy(p->alg_name, algo->name); + p->alg_key_len = ualg->alg_key_len; + p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; + memcpy(p->alg_key, ualg->alg_key, (ualg->alg_key_len + 7) / 8); + + *algpp = p; return 0; } +static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props, + struct nlattr *rta) +{ + struct xfrm_algo_auth *p, *ualg; + struct xfrm_algo_desc *algo; + + if (!rta) + return 0; + + ualg = nla_data(rta); -static inline int xfrm_user_sec_ctx_size(struct xfrm_policy *xp) + algo = xfrm_aalg_get_byname(ualg->alg_name, 1); + if (!algo) + return -ENOSYS; + if ((ualg->alg_trunc_len / 8) > MAX_AH_AUTH_LEN || + ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) + return -EINVAL; + *props = algo->desc.sadb_alg_id; + + p = kmemdup(ualg, xfrm_alg_auth_len(ualg), GFP_KERNEL); + if (!p) + return -ENOMEM; + + strcpy(p->alg_name, algo->name); + if (!p->alg_trunc_len) + p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; + + *algpp = p; + return 0; +} + +static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, + struct nlattr *rta) { - struct xfrm_sec_ctx *xfrm_ctx = xp->security; - int len = 0; + struct xfrm_algo_aead *p, *ualg; + struct xfrm_algo_desc *algo; - if (xfrm_ctx) { - len += sizeof(struct xfrm_user_sec_ctx); - len += xfrm_ctx->ctx_len; - } - return len; + if (!rta) + return 0; + + ualg = nla_data(rta); + + algo = xfrm_aead_get_byname(ualg->alg_name, ualg->alg_icv_len, 1); + if (!algo) + return -ENOSYS; + *props = algo->desc.sadb_alg_id; + + p = kmemdup(ualg, aead_len(ualg), GFP_KERNEL); + if (!p) + return -ENOMEM; + + strcpy(p->alg_name, algo->name); + *algpp = p; + return 0; } -static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) +static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn, + struct nlattr *rp) { - struct xfrm_user_sec_ctx *uctx; + struct xfrm_replay_state_esn *up; + int ulen; - if (!u_arg) + if (!replay_esn || !rp) return 0; - uctx = RTA_DATA(u_arg); - return security_xfrm_state_alloc(x, uctx); + up = nla_data(rp); + ulen = xfrm_replay_state_esn_len(up); + + if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + return -EINVAL; + + return 0; +} + +static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, + struct xfrm_replay_state_esn **preplay_esn, + struct nlattr *rta) +{ + struct xfrm_replay_state_esn *p, *pp, *up; + int klen, ulen; + + if (!rta) + return 0; + + up = nla_data(rta); + klen = xfrm_replay_state_esn_len(up); + ulen = nla_len(rta) >= klen ? klen : sizeof(*up); + + p = kzalloc(klen, GFP_KERNEL); + if (!p) + return -ENOMEM; + + pp = kzalloc(klen, GFP_KERNEL); + if (!pp) { + kfree(p); + return -ENOMEM; + } + + memcpy(p, up, ulen); + memcpy(pp, up, ulen); + + *replay_esn = p; + *preplay_esn = pp; + + return 0; +} + +static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) +{ + int len = 0; + + if (xfrm_ctx) { + len += sizeof(struct xfrm_user_sec_ctx); + len += xfrm_ctx->ctx_len; + } + return len; } static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) @@ -269,18 +443,69 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->sel, &p->sel, sizeof(x->sel)); memcpy(&x->lft, &p->lft, sizeof(x->lft)); x->props.mode = p->mode; - x->props.replay_window = p->replay_window; + x->props.replay_window = min_t(unsigned int, p->replay_window, + sizeof(x->replay.bitmap) * 8); x->props.reqid = p->reqid; x->props.family = p->family; - x->props.saddr = p->saddr; + memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; + + if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC)) + x->sel.family = p->family; +} + +/* + * someday when pfkey also has support, we could have the code + * somehow made shareable and move it to xfrm_state.c - JHS + * +*/ +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, + int update_esn) +{ + struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL; + struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; + struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; + struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; + + if (re) { + struct xfrm_replay_state_esn *replay_esn; + replay_esn = nla_data(re); + memcpy(x->replay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + memcpy(x->preplay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + } + + if (rp) { + struct xfrm_replay_state *replay; + replay = nla_data(rp); + memcpy(&x->replay, replay, sizeof(*replay)); + memcpy(&x->preplay, replay, sizeof(*replay)); + } + + if (lt) { + struct xfrm_lifetime_cur *ltime; + ltime = nla_data(lt); + x->curlft.bytes = ltime->bytes; + x->curlft.packets = ltime->packets; + x->curlft.add_time = ltime->add_time; + x->curlft.use_time = ltime->use_time; + } + + if (et) + x->replay_maxage = nla_get_u32(et); + + if (rt) + x->replay_maxdiff = nla_get_u32(rt); } -static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, - struct rtattr **xfrma, +static struct xfrm_state *xfrm_state_construct(struct net *net, + struct xfrm_usersa_info *p, + struct nlattr **attrs, int *errp) { - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; if (!x) @@ -288,29 +513,70 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, copy_from_user_state(x, p); - if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, - xfrm_aalg_get_byname, - xfrma[XFRMA_ALG_AUTH-1]))) + if (attrs[XFRMA_SA_EXTRA_FLAGS]) + x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); + + if ((err = attach_aead(&x->aead, &x->props.ealgo, + attrs[XFRMA_ALG_AEAD]))) + goto error; + if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo, + attrs[XFRMA_ALG_AUTH_TRUNC]))) goto error; + if (!x->props.aalgo) { + if ((err = attach_auth(&x->aalg, &x->props.aalgo, + attrs[XFRMA_ALG_AUTH]))) + goto error; + } if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, xfrm_ealg_get_byname, - xfrma[XFRMA_ALG_CRYPT-1]))) + attrs[XFRMA_ALG_CRYPT]))) goto error; if ((err = attach_one_algo(&x->calg, &x->props.calgo, xfrm_calg_get_byname, - xfrma[XFRMA_ALG_COMP-1]))) - goto error; - if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) + attrs[XFRMA_ALG_COMP]))) goto error; - err = xfrm_init_state(x); + if (attrs[XFRMA_ENCAP]) { + x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), + sizeof(*x->encap), GFP_KERNEL); + if (x->encap == NULL) + goto error; + } + + if (attrs[XFRMA_TFCPAD]) + x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]); + + if (attrs[XFRMA_COADDR]) { + x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), + sizeof(*x->coaddr), GFP_KERNEL); + if (x->coaddr == NULL) + goto error; + } + + xfrm_mark_get(attrs, &x->mark); + + err = __xfrm_init_state(x, false); if (err) goto error; - if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1]))) + if (attrs[XFRMA_SEC_CTX] && + security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) + goto error; + + if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, + attrs[XFRMA_REPLAY_ESN_VAL]))) goto error; x->km.seq = p->seq; + x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; + /* sysctl_xfrm_aevent_etime is in 100ms units */ + x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; + + if ((err = xfrm_init_replay(x))) + goto error; + + /* override default values from above */ + xfrm_update_ae_params(x, attrs, 0); return x; @@ -322,18 +588,20 @@ error_no_put: return NULL; } -static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) { - struct xfrm_usersa_info *p = NLMSG_DATA(nlh); + struct net *net = sock_net(skb->sk); + struct xfrm_usersa_info *p = nlmsg_data(nlh); struct xfrm_state *x; int err; struct km_event c; - err = verify_newsa_info(p, (struct rtattr **)xfrma); + err = verify_newsa_info(p, attrs); if (err) return err; - x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err); + x = xfrm_state_construct(net, p, attrs, &err); if (!x) return err; @@ -343,14 +611,16 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) else err = xfrm_state_update(x); + xfrm_audit_state_add(x, err ? 0 : 1, true); + if (err < 0) { x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); + __xfrm_state_put(x); goto out; } c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -359,45 +629,86 @@ out: return err; } -static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static struct xfrm_state *xfrm_user_state_lookup(struct net *net, + struct xfrm_usersa_id *p, + struct nlattr **attrs, + int *errp) { - struct xfrm_state *x; + struct xfrm_state *x = NULL; + struct xfrm_mark m; int err; + u32 mark = xfrm_mark_get(attrs, &m); + + if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { + err = -ESRCH; + x = xfrm_state_lookup(net, mark, &p->daddr, p->spi, p->proto, p->family); + } else { + xfrm_address_t *saddr = NULL; + + verify_one_addr(attrs, XFRMA_SRCADDR, &saddr); + if (!saddr) { + err = -EINVAL; + goto out; + } + + err = -ESRCH; + x = xfrm_state_lookup_byaddr(net, mark, + &p->daddr, saddr, + p->proto, p->family); + } + + out: + if (!x && errp) + *errp = err; + return x; +} + +static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct xfrm_state *x; + int err = -ESRCH; struct km_event c; - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct xfrm_usersa_id *p = nlmsg_data(nlh); - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) - return -ESRCH; + return err; + + if ((err = security_xfrm_state_delete(x)) != 0) + goto out; if (xfrm_state_kern(x)) { - xfrm_state_put(x); - return -EPERM; + err = -EPERM; + goto out; } err = xfrm_state_delete(x); - if (err < 0) { - xfrm_state_put(x); - return err; - } + + if (err < 0) + goto out; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); - xfrm_state_put(x); +out: + xfrm_audit_state_delete(x, err ? 0 : 1, true); + xfrm_state_put(x); return err; } static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { + memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); memcpy(&p->stats, &x->stats, sizeof(p->stats)); - p->saddr = x->props.saddr; + memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr)); p->mode = x->props.mode; p->replay_window = x->props.replay_window; p->reqid = x->props.reqid; @@ -411,10 +722,122 @@ struct xfrm_dump_info { struct sk_buff *out_skb; u32 nlmsg_seq; u16 nlmsg_flags; - int start_idx; - int this_idx; }; +static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) +{ + struct xfrm_user_sec_ctx *uctx; + struct nlattr *attr; + int ctx_size = sizeof(*uctx) + s->ctx_len; + + attr = nla_reserve(skb, XFRMA_SEC_CTX, ctx_size); + if (attr == NULL) + return -EMSGSIZE; + + uctx = nla_data(attr); + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = ctx_size; + uctx->ctx_doi = s->ctx_doi; + uctx->ctx_alg = s->ctx_alg; + uctx->ctx_len = s->ctx_len; + memcpy(uctx + 1, s->ctx_str, s->ctx_len); + + return 0; +} + +static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) +{ + struct xfrm_algo *algo; + struct nlattr *nla; + + nla = nla_reserve(skb, XFRMA_ALG_AUTH, + sizeof(*algo) + (auth->alg_key_len + 7) / 8); + if (!nla) + return -EMSGSIZE; + + algo = nla_data(nla); + strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); + memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); + algo->alg_key_len = auth->alg_key_len; + + return 0; +} + +/* Don't change this without updating xfrm_sa_len! */ +static int copy_to_user_state_extra(struct xfrm_state *x, + struct xfrm_usersa_info *p, + struct sk_buff *skb) +{ + int ret = 0; + + copy_to_user_state(x, p); + + if (x->props.extra_flags) { + ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS, + x->props.extra_flags); + if (ret) + goto out; + } + + if (x->coaddr) { + ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + if (ret) + goto out; + } + if (x->lastused) { + ret = nla_put_u64(skb, XFRMA_LASTUSED, x->lastused); + if (ret) + goto out; + } + if (x->aead) { + ret = nla_put(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); + if (ret) + goto out; + } + if (x->aalg) { + ret = copy_to_user_auth(x->aalg, skb); + if (!ret) + ret = nla_put(skb, XFRMA_ALG_AUTH_TRUNC, + xfrm_alg_auth_len(x->aalg), x->aalg); + if (ret) + goto out; + } + if (x->ealg) { + ret = nla_put(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); + if (ret) + goto out; + } + if (x->calg) { + ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + if (ret) + goto out; + } + if (x->encap) { + ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (ret) + goto out; + } + if (x->tfcpad) { + ret = nla_put_u32(skb, XFRMA_TFCPAD, x->tfcpad); + if (ret) + goto out; + } + ret = xfrm_mark_put(skb, &x->mark); + if (ret) + goto out; + if (x->replay_esn) { + ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); + if (ret) + goto out; + } + if (x->security) + ret = copy_sec_ctx(x->security, skb); +out: + return ret; +} + static int dump_one_state(struct xfrm_state *x, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; @@ -422,67 +845,78 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct sk_buff *skb = sp->out_skb; struct xfrm_usersa_info *p; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - if (sp->this_idx < sp->start_idx) - goto out; + int err; - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWSA, sizeof(*p)); - nlh->nlmsg_flags = sp->nlmsg_flags; + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, + XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); + if (nlh == NULL) + return -EMSGSIZE; - p = NLMSG_DATA(nlh); - copy_to_user_state(x, p); + p = nlmsg_data(nlh); - if (x->aalg) - RTA_PUT(skb, XFRMA_ALG_AUTH, - sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); - if (x->ealg) - RTA_PUT(skb, XFRMA_ALG_CRYPT, - sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); - if (x->calg) - RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + err = copy_to_user_state_extra(x, p, skb); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + nlmsg_end(skb, nlh); + return 0; +} - if (x->encap) - RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); +static int xfrm_dump_sa_done(struct netlink_callback *cb) +{ + struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; + struct sock *sk = cb->skb->sk; + struct net *net = sock_net(sk); - if (x->security) { - int ctx_size = sizeof(struct xfrm_sec_ctx) + - x->security->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); - - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = x->security->ctx_doi; - uctx->ctx_alg = x->security->ctx_alg; - uctx->ctx_len = x->security->ctx_len; - memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); - } - nlh->nlmsg_len = skb->tail - b; -out: - sp->this_idx++; + xfrm_state_walk_done(walk, net); return 0; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; } +static const struct nla_policy xfrma_policy[XFRMA_MAX+1]; static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); + struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; struct xfrm_dump_info info; + BUILD_BUG_ON(sizeof(struct xfrm_state_walk) > + sizeof(cb->args) - sizeof(cb->args[0])); + info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); - cb->args[0] = info.this_idx; + + if (!cb->args[0]) { + struct nlattr *attrs[XFRMA_MAX+1]; + struct xfrm_address_filter *filter = NULL; + u8 proto = 0; + int err; + + cb->args[0] = 1; + + err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, + xfrma_policy); + if (err < 0) + return err; + + if (attrs[XFRMA_ADDRESS_FILTER]) { + filter = kmalloc(sizeof(*filter), GFP_KERNEL); + if (filter == NULL) + return -ENOMEM; + + memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]), + sizeof(*filter)); + } + + if (attrs[XFRMA_PROTO]) + proto = nla_get_u8(attrs[XFRMA_PROTO]); + + xfrm_state_walk_init(walk, proto, filter); + } + + (void) xfrm_state_walk(net, walk, dump_one_state, &info); return skb->len; } @@ -492,35 +926,170 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; info.in_skb = in_skb; info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; - info.this_idx = info.start_idx = 0; - if (dump_one_state(x, 0, &info)) { + err = dump_one_state(x, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; } -static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +/* A wrapper for nlmsg_multicast() checking that nlsk is still available. + * Must be called with RCU read lock. + */ +static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, + u32 pid, unsigned int group) +{ + struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); + + if (nlsk) + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); + else + return -1; +} + +static inline size_t xfrm_spdinfo_msgsize(void) +{ + return NLMSG_ALIGN(4) + + nla_total_size(sizeof(struct xfrmu_spdinfo)) + + nla_total_size(sizeof(struct xfrmu_spdhinfo)); +} + +static int build_spdinfo(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, u32 flags) +{ + struct xfrmk_spdinfo si; + struct xfrmu_spdinfo spc; + struct xfrmu_spdhinfo sph; + struct nlmsghdr *nlh; + int err; + u32 *f; + + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); + if (nlh == NULL) /* shouldn't really happen ... */ + return -EMSGSIZE; + + f = nlmsg_data(nlh); + *f = flags; + xfrm_spd_getinfo(net, &si); + spc.incnt = si.incnt; + spc.outcnt = si.outcnt; + spc.fwdcnt = si.fwdcnt; + spc.inscnt = si.inscnt; + spc.outscnt = si.outscnt; + spc.fwdscnt = si.fwdscnt; + sph.spdhcnt = si.spdhcnt; + sph.spdhmcnt = si.spdhmcnt; + + err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); + if (!err) + err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + + return nlmsg_end(skb, nlh); +} + +static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) { - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct net *net = sock_net(skb->sk); + struct sk_buff *r_skb; + u32 *flags = nlmsg_data(nlh); + u32 sportid = NETLINK_CB(skb).portid; + u32 seq = nlh->nlmsg_seq; + + r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); + if (r_skb == NULL) + return -ENOMEM; + + if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) + BUG(); + + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); +} + +static inline size_t xfrm_sadinfo_msgsize(void) +{ + return NLMSG_ALIGN(4) + + nla_total_size(sizeof(struct xfrmu_sadhinfo)) + + nla_total_size(4); /* XFRMA_SAD_CNT */ +} + +static int build_sadinfo(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, u32 flags) +{ + struct xfrmk_sadinfo si; + struct xfrmu_sadhinfo sh; + struct nlmsghdr *nlh; + int err; + u32 *f; + + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); + if (nlh == NULL) /* shouldn't really happen ... */ + return -EMSGSIZE; + + f = nlmsg_data(nlh); + *f = flags; + xfrm_sad_getinfo(net, &si); + + sh.sadhmcnt = si.sadhmcnt; + sh.sadhcnt = si.sadhcnt; + + err = nla_put_u32(skb, XFRMA_SAD_CNT, si.sadcnt); + if (!err) + err = nla_put(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + + return nlmsg_end(skb, nlh); +} + +static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct sk_buff *r_skb; + u32 *flags = nlmsg_data(nlh); + u32 sportid = NETLINK_CB(skb).portid; + u32 seq = nlh->nlmsg_seq; + + r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); + if (r_skb == NULL) + return -ENOMEM; + + if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) + BUG(); + + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); +} + +static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct xfrm_usersa_id *p = nlmsg_data(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; - int err; + int err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); - err = -ESRCH; + x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) goto out_noput; @@ -528,48 +1097,28 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: return err; } -static int verify_userspi_info(struct xfrm_userspi_info *p) -{ - switch (p->info.id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - break; - - case IPPROTO_COMP: - /* IPCOMP spi is 16-bits. */ - if (p->max >= 0x10000) - return -EINVAL; - break; - - default: - return -EINVAL; - }; - - if (p->min > p->max) - return -EINVAL; - - return 0; -} - -static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct xfrm_userspi_info *p; struct sk_buff *resp_skb; xfrm_address_t *daddr; int family; int err; + u32 mark; + struct xfrm_mark m; - p = NLMSG_DATA(nlh); - err = verify_userspi_info(p); + p = nlmsg_data(nlh); + err = verify_spi_info(p->info.id.proto, p->min, p->max); if (err) goto out_noput; @@ -577,16 +1126,18 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void ** daddr = &p->info.id.daddr; x = NULL; + + mark = xfrm_mark_get(attrs, &m); if (p->info.seq) { - x = xfrm_find_acq_byseq(p->info.seq); - if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { + x = xfrm_find_acq_byseq(net, mark, p->info.seq); + if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; } } if (!x) - x = xfrm_find_acq(p->info.mode, p->info.reqid, + x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid, p->info.id.proto, daddr, &p->info.saddr, 1, family); @@ -594,23 +1145,17 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void ** if (x == NULL) goto out_noput; - resp_skb = ERR_PTR(-ENOENT); - - spin_lock_bh(&x->lock); - if (x->km.state != XFRM_STATE_DEAD) { - xfrm_alloc_spi(x, htonl(p->min), htonl(p->max)); - if (x->id.spi) - resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); - } - spin_unlock_bh(&x->lock); + err = xfrm_alloc_spi(x, p->min, p->max); + if (err) + goto out; + resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); goto out; } - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -618,7 +1163,7 @@ out_noput: return err; } -static int verify_policy_dir(__u8 dir) +static int verify_policy_dir(u8 dir) { switch (dir) { case XFRM_POLICY_IN: @@ -628,13 +1173,31 @@ static int verify_policy_dir(__u8 dir) default: return -EINVAL; - }; + } + + return 0; +} + +static int verify_policy_type(u8 type) +{ + switch (type) { + case XFRM_POLICY_TYPE_MAIN: +#ifdef CONFIG_XFRM_SUB_POLICY + case XFRM_POLICY_TYPE_SUB: +#endif + break; + + default: + return -EINVAL; + } return 0; } static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) { + int ret; + switch (p->share) { case XFRM_SHARE_ANY: case XFRM_SHARE_SESSION: @@ -644,7 +1207,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) default: return -EINVAL; - }; + } switch (p->action) { case XFRM_POLICY_ALLOW: @@ -653,14 +1216,14 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) default: return -EINVAL; - }; + } switch (p->sel.family) { case AF_INET: break; case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) break; #else return -EAFNOSUPPORT; @@ -668,21 +1231,27 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) default: return -EINVAL; - }; + } + + ret = verify_policy_dir(p->dir); + if (ret) + return ret; + if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + return -EINVAL; - return verify_policy_dir(p->dir); + return 0; } -static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma) +static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; if (!rt) return 0; - uctx = RTA_DATA(rt); - return security_xfrm_policy_alloc(pol, uctx); + uctx = nla_data(rt); + return security_xfrm_policy_alloc(&pol->security, uctx, GFP_KERNEL); } static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, @@ -704,28 +1273,85 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, t->aalgos = ut->aalgos; t->ealgos = ut->ealgos; t->calgos = ut->calgos; + /* If all masks are ~0, then we allow all algorithms. */ + t->allalgs = !~(t->aalgos & t->ealgos & t->calgos); + t->encap_family = ut->family; } } -static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) +static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) { - struct rtattr *rt = xfrma[XFRMA_TMPL-1]; - struct xfrm_user_tmpl *utmpl; - int nr; + int i; + + if (nr > XFRM_MAX_DEPTH) + return -EINVAL; + + for (i = 0; i < nr; i++) { + /* We never validated the ut->family value, so many + * applications simply leave it at zero. The check was + * never made and ut->family was ignored because all + * templates could be assumed to have the same family as + * the policy itself. Now that we will have ipv4-in-ipv6 + * and ipv6-in-ipv4 tunnels, this is no longer true. + */ + if (!ut[i].family) + ut[i].family = family; + + switch (ut[i].family) { + case AF_INET: + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + break; +#endif + default: + return -EINVAL; + } + } + + return 0; +} + +static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_TMPL]; if (!rt) { pol->xfrm_nr = 0; } else { - nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl); + struct xfrm_user_tmpl *utmpl = nla_data(rt); + int nr = nla_len(rt) / sizeof(*utmpl); + int err; - if (nr > XFRM_MAX_DEPTH) - return -EINVAL; + err = validate_tmpl(nr, utmpl, pol->family); + if (err) + return err; - copy_templates(pol, RTA_DATA(rt), nr); + copy_templates(pol, utmpl, nr); } return 0; } +static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_POLICY_TYPE]; + struct xfrm_userpolicy_type *upt; + u8 type = XFRM_POLICY_TYPE_MAIN; + int err; + + if (rt) { + upt = nla_data(rt); + type = upt->type; + } + + err = verify_policy_type(type); + if (err) + return err; + + *tp = type; + return 0; +} + static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) { xp->priority = p->priority; @@ -740,6 +1366,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { + memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); @@ -752,9 +1379,9 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i p->share = XFRM_SHARE_ANY; /* XXX xp->share */ } -static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp) +static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); + struct xfrm_policy *xp = xfrm_policy_alloc(net, GFP_KERNEL); int err; if (!xp) { @@ -764,21 +1391,30 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, copy_from_user_policy(xp, p); - if (!(err = copy_from_user_tmpl(xp, xfrma))) - err = copy_from_user_sec_ctx(xp, xfrma); + err = copy_from_user_policy_type(&xp->type, attrs); + if (err) + goto error; - if (err) { - *errp = err; - kfree(xp); - xp = NULL; - } + if (!(err = copy_from_user_tmpl(xp, attrs))) + err = copy_from_user_sec_ctx(xp, attrs); + if (err) + goto error; + + xfrm_mark_get(attrs, &xp->mark); return xp; + error: + *errp = err; + xp->walk.dead = 1; + xfrm_policy_destroy(xp); + return NULL; } -static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) { - struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); + struct net *net = sock_net(skb->sk); + struct xfrm_userpolicy_info *p = nlmsg_data(nlh); struct xfrm_policy *xp; struct km_event c; int err; @@ -787,29 +1423,31 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr err = verify_newpolicy_info(p); if (err) return err; - err = verify_sec_ctx_len((struct rtattr **)xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; - xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err); + xp = xfrm_policy_construct(net, p, attrs, &err); if (!xp) return err; - /* shouldnt excl be based on nlh flags?? + /* shouldn't excl be based on nlh flags?? * Aha! this is anti-netlink really i.e more pfkey derived * in netlink excl is a flag and you wouldnt need * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); + xfrm_audit_policy_add(xp, err ? 0 : 1, true); + if (err) { - security_xfrm_policy_free(xp); + security_xfrm_policy_free(xp->security); kfree(xp); return err; } c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); xfrm_pol_put(xp); @@ -829,8 +1467,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); - up->family = xp->family; + up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); up->reqid = kp->reqid; up->mode = kp->mode; @@ -840,36 +1479,50 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) up->ealgos = kp->ealgos; up->calgos = kp->calgos; } - RTA_PUT(skb, XFRMA_TMPL, - (sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr), - vec); + return nla_put(skb, XFRMA_TMPL, + sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr, vec); +} + +static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb) +{ + if (x->security) { + return copy_sec_ctx(x->security, skb); + } return 0; +} -rtattr_failure: - return -1; +static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) +{ + if (xp->security) + return copy_sec_ctx(xp->security, skb); + return 0; +} +static inline size_t userpolicy_type_attrsize(void) +{ +#ifdef CONFIG_XFRM_SUB_POLICY + return nla_total_size(sizeof(struct xfrm_userpolicy_type)); +#else + return 0; +#endif } -static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) +#ifdef CONFIG_XFRM_SUB_POLICY +static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { - if (xp->security) { - int ctx_size = sizeof(struct xfrm_sec_ctx) + - xp->security->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + struct xfrm_userpolicy_type upt = { + .type = type, + }; - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = xp->security->ctx_doi; - uctx->ctx_alg = xp->security->ctx_alg; - uctx->ctx_len = xp->security->ctx_len; - memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len); - } - return 0; + return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); +} - rtattr_failure: - return -1; +#else +static inline int copy_to_user_policy_type(u8 type, struct sk_buff *skb) +{ + return 0; } +#endif static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) { @@ -878,45 +1531,59 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - if (sp->this_idx < sp->start_idx) - goto out; + int err; - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWPOLICY, sizeof(*p)); - p = NLMSG_DATA(nlh); - nlh->nlmsg_flags = sp->nlmsg_flags; + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, + XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); + if (nlh == NULL) + return -EMSGSIZE; + p = nlmsg_data(nlh); copy_to_user_policy(xp, p, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) - goto nlmsg_failure; - - nlh->nlmsg_len = skb->tail - b; -out: - sp->this_idx++; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_sec_ctx(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + nlmsg_end(skb, nlh); return 0; +} -nlmsg_failure: - skb_trim(skb, b - skb->data); - return -1; +static int xfrm_dump_policy_done(struct netlink_callback *cb) +{ + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct net *net = sock_net(cb->skb->sk); + + xfrm_policy_walk_done(walk, net); + return 0; } static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; struct xfrm_dump_info info; + BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) > + sizeof(cb->args) - sizeof(cb->args[0])); + info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(dump_one_policy, &info); - cb->args[0] = info.this_idx; + + if (!cb->args[0]) { + cb->args[0] = 1; + xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); + } + + (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); return skb->len; } @@ -927,61 +1594,71 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; info.in_skb = in_skb; info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; - info.this_idx = info.start_idx = 0; - if (dump_one_policy(xp, dir, 0, &info) < 0) { + err = dump_one_policy(xp, dir, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; } -static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; + u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct km_event c; int delete; + struct xfrm_mark m; + u32 mark = xfrm_mark_get(attrs, &m); - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; + err = copy_from_user_policy_type(&type, attrs); + if (err) + return err; + err = verify_policy_dir(p->dir); if (err) return err; if (p->index) - xp = xfrm_policy_byid(p->dir, p->index, delete); + xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err); else { - struct rtattr **rtattrs = (struct rtattr **)xfrma; - struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; - struct xfrm_policy tmp; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; + struct xfrm_sec_ctx *ctx; - err = verify_sec_ctx_len(rtattrs); + err = verify_sec_ctx_len(attrs); if (err) return err; - memset(&tmp, 0, sizeof(struct xfrm_policy)); + ctx = NULL; if (rt) { - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + struct xfrm_user_sec_ctx *uctx = nla_data(rt); - if ((err = security_xfrm_policy_alloc(&tmp, uctx))) + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); + if (err) return err; } - xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); - security_xfrm_policy_free(&tmp); + xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, &p->sel, + ctx, delete, &err); + security_xfrm_policy_free(ctx); } if (xp == NULL) return -ENOENT; @@ -993,51 +1670,589 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, - MSG_DONTWAIT); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, + NETLINK_CB(skb).portid); } } else { + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); + + if (err != 0) + goto out; + c.data.byid = p->index; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); } +out: xfrm_pol_put(xp); - + if (delete && err == 0) + xfrm_garbage_collect(net); return err; } -static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct km_event c; - struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); + struct xfrm_usersa_flush *p = nlmsg_data(nlh); + int err; - xfrm_state_flush(p->proto); + err = xfrm_state_flush(net, p->proto, true); + if (err) { + if (err == -ESRCH) /* empty table */ + return 0; + return err; + } c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; + c.net = net; km_state_notify(NULL, &c); return 0; } -static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) +{ + size_t replay_size = x->replay_esn ? + xfrm_replay_state_esn_len(x->replay_esn) : + sizeof(struct xfrm_replay_state); + + return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) + + nla_total_size(replay_size) + + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + + nla_total_size(sizeof(struct xfrm_mark)) + + nla_total_size(4) /* XFRM_AE_RTHR */ + + nla_total_size(4); /* XFRM_AE_ETHR */ +} + +static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { + struct xfrm_aevent_id *id; + struct nlmsghdr *nlh; + int err; + + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + if (nlh == NULL) + return -EMSGSIZE; + + id = nlmsg_data(nlh); + memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr)); + id->sa_id.spi = x->id.spi; + id->sa_id.family = x->props.family; + id->sa_id.proto = x->id.proto; + memcpy(&id->saddr, &x->props.saddr, sizeof(x->props.saddr)); + id->reqid = x->props.reqid; + id->flags = c->data.aevent; + + if (x->replay_esn) { + err = nla_put(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); + } else { + err = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), + &x->replay); + } + if (err) + goto out_cancel; + err = nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); + if (err) + goto out_cancel; + + if (id->flags & XFRM_AE_RTHR) { + err = nla_put_u32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); + if (err) + goto out_cancel; + } + if (id->flags & XFRM_AE_ETHR) { + err = nla_put_u32(skb, XFRMA_ETIMER_THRESH, + x->replay_maxage * 10 / HZ); + if (err) + goto out_cancel; + } + err = xfrm_mark_put(skb, &x->mark); + if (err) + goto out_cancel; + + return nlmsg_end(skb, nlh); + +out_cancel: + nlmsg_cancel(skb, nlh); + return err; +} + +static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct xfrm_state *x; + struct sk_buff *r_skb; + int err; struct km_event c; + u32 mark; + struct xfrm_mark m; + struct xfrm_aevent_id *p = nlmsg_data(nlh); + struct xfrm_usersa_id *id = &p->sa_id; + + mark = xfrm_mark_get(attrs, &m); + + x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); + if (x == NULL) + return -ESRCH; + + r_skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); + if (r_skb == NULL) { + xfrm_state_put(x); + return -ENOMEM; + } + + /* + * XXX: is this lock really needed - none of the other + * gets lock (the concern is things getting updated + * while we are still reading) - jhs + */ + spin_lock_bh(&x->lock); + c.data.aevent = p->flags; + c.seq = nlh->nlmsg_seq; + c.portid = nlh->nlmsg_pid; + + if (build_aevent(r_skb, x, &c) < 0) + BUG(); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); + spin_unlock_bh(&x->lock); + xfrm_state_put(x); + return err; +} + +static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct xfrm_state *x; + struct km_event c; + int err = -EINVAL; + u32 mark = 0; + struct xfrm_mark m; + struct xfrm_aevent_id *p = nlmsg_data(nlh); + struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; + struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; + + if (!lt && !rp && !re) + return err; + + /* pedantic mode - thou shalt sayeth replaceth */ + if (!(nlh->nlmsg_flags&NLM_F_REPLACE)) + return err; + + mark = xfrm_mark_get(attrs, &m); + + x = xfrm_state_lookup(net, mark, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); + if (x == NULL) + return -ESRCH; + + if (x->km.state != XFRM_STATE_VALID) + goto out; + + err = xfrm_replay_verify_len(x->replay_esn, re); + if (err) + goto out; + + spin_lock_bh(&x->lock); + xfrm_update_ae_params(x, attrs, 1); + spin_unlock_bh(&x->lock); - xfrm_policy_flush(); c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; + c.data.aevent = XFRM_AE_CU; + km_state_notify(x, &c); + err = 0; +out: + xfrm_state_put(x); + return err; +} + +static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct km_event c; + u8 type = XFRM_POLICY_TYPE_MAIN; + int err; + + err = copy_from_user_policy_type(&type, attrs); + if (err) + return err; + + err = xfrm_policy_flush(net, type, true); + if (err) { + if (err == -ESRCH) /* empty table */ + return 0; + return err; + } + + c.data.type = type; + c.event = nlh->nlmsg_type; + c.seq = nlh->nlmsg_seq; + c.portid = nlh->nlmsg_pid; + c.net = net; km_policy_notify(NULL, 0, &c); return 0; } -#define XMSGSIZE(type) NLMSG_LENGTH(sizeof(struct type)) +static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct xfrm_policy *xp; + struct xfrm_user_polexpire *up = nlmsg_data(nlh); + struct xfrm_userpolicy_info *p = &up->pol; + u8 type = XFRM_POLICY_TYPE_MAIN; + int err = -ENOENT; + struct xfrm_mark m; + u32 mark = xfrm_mark_get(attrs, &m); + + err = copy_from_user_policy_type(&type, attrs); + if (err) + return err; + + err = verify_policy_dir(p->dir); + if (err) + return err; + + if (p->index) + xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err); + else { + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; + struct xfrm_sec_ctx *ctx; + + err = verify_sec_ctx_len(attrs); + if (err) + return err; + + ctx = NULL; + if (rt) { + struct xfrm_user_sec_ctx *uctx = nla_data(rt); + + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); + if (err) + return err; + } + xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, + &p->sel, ctx, 0, &err); + security_xfrm_policy_free(ctx); + } + if (xp == NULL) + return -ENOENT; + + if (unlikely(xp->walk.dead)) + goto out; + + err = 0; + if (up->hard) { + xfrm_policy_delete(xp, p->dir); + xfrm_audit_policy_delete(xp, 1, true); + } else { + // reset the timers here? + WARN(1, "Dont know what to do with soft policy expire\n"); + } + km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid); + +out: + xfrm_pol_put(xp); + return err; +} + +static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct xfrm_state *x; + int err; + struct xfrm_user_expire *ue = nlmsg_data(nlh); + struct xfrm_usersa_info *p = &ue->state; + struct xfrm_mark m; + u32 mark = xfrm_mark_get(attrs, &m); + + x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family); + + err = -ENOENT; + if (x == NULL) + return err; + + spin_lock_bh(&x->lock); + err = -EINVAL; + if (x->km.state != XFRM_STATE_VALID) + goto out; + km_state_expired(x, ue->hard, nlh->nlmsg_pid); + + if (ue->hard) { + __xfrm_state_delete(x); + xfrm_audit_state_delete(x, 1, true); + } + err = 0; +out: + spin_unlock_bh(&x->lock); + xfrm_state_put(x); + return err; +} + +static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct xfrm_policy *xp; + struct xfrm_user_tmpl *ut; + int i; + struct nlattr *rt = attrs[XFRMA_TMPL]; + struct xfrm_mark mark; + + struct xfrm_user_acquire *ua = nlmsg_data(nlh); + struct xfrm_state *x = xfrm_state_alloc(net); + int err = -ENOMEM; + + if (!x) + goto nomem; + + xfrm_mark_get(attrs, &mark); + + err = verify_newpolicy_info(&ua->policy); + if (err) + goto bad_policy; + + /* build an XP */ + xp = xfrm_policy_construct(net, &ua->policy, attrs, &err); + if (!xp) + goto free_state; + + memcpy(&x->id, &ua->id, sizeof(ua->id)); + memcpy(&x->props.saddr, &ua->saddr, sizeof(ua->saddr)); + memcpy(&x->sel, &ua->sel, sizeof(ua->sel)); + xp->mark.m = x->mark.m = mark.m; + xp->mark.v = x->mark.v = mark.v; + ut = nla_data(rt); + /* extract the templates and for each call km_key */ + for (i = 0; i < xp->xfrm_nr; i++, ut++) { + struct xfrm_tmpl *t = &xp->xfrm_vec[i]; + memcpy(&x->id, &t->id, sizeof(x->id)); + x->props.mode = t->mode; + x->props.reqid = t->reqid; + x->props.family = ut->family; + t->aalgos = ua->aalgos; + t->ealgos = ua->ealgos; + t->calgos = ua->calgos; + err = km_query(x, t, xp); + + } + + kfree(x); + kfree(xp); + + return 0; + +bad_policy: + WARN(1, "BAD policy passed\n"); +free_state: + kfree(x); +nomem: + return err; +} + +#ifdef CONFIG_XFRM_MIGRATE +static int copy_from_user_migrate(struct xfrm_migrate *ma, + struct xfrm_kmaddress *k, + struct nlattr **attrs, int *num) +{ + struct nlattr *rt = attrs[XFRMA_MIGRATE]; + struct xfrm_user_migrate *um; + int i, num_migrate; + + if (k != NULL) { + struct xfrm_user_kmaddress *uk; + + uk = nla_data(attrs[XFRMA_KMADDRESS]); + memcpy(&k->local, &uk->local, sizeof(k->local)); + memcpy(&k->remote, &uk->remote, sizeof(k->remote)); + k->family = uk->family; + k->reserved = uk->reserved; + } + + um = nla_data(rt); + num_migrate = nla_len(rt) / sizeof(*um); + + if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) + return -EINVAL; + + for (i = 0; i < num_migrate; i++, um++, ma++) { + memcpy(&ma->old_daddr, &um->old_daddr, sizeof(ma->old_daddr)); + memcpy(&ma->old_saddr, &um->old_saddr, sizeof(ma->old_saddr)); + memcpy(&ma->new_daddr, &um->new_daddr, sizeof(ma->new_daddr)); + memcpy(&ma->new_saddr, &um->new_saddr, sizeof(ma->new_saddr)); + + ma->proto = um->proto; + ma->mode = um->mode; + ma->reqid = um->reqid; + + ma->old_family = um->old_family; + ma->new_family = um->new_family; + } + + *num = i; + return 0; +} + +static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); + struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress km, *kmp; + u8 type; + int err; + int n = 0; + struct net *net = sock_net(skb->sk); + + if (attrs[XFRMA_MIGRATE] == NULL) + return -EINVAL; + + kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; + + err = copy_from_user_policy_type(&type, attrs); + if (err) + return err; + + err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n); + if (err) + return err; + + if (!n) + return 0; + + xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net); + + return 0; +} +#else +static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + return -ENOPROTOOPT; +} +#endif + +#ifdef CONFIG_XFRM_MIGRATE +static int copy_to_user_migrate(const struct xfrm_migrate *m, struct sk_buff *skb) +{ + struct xfrm_user_migrate um; + + memset(&um, 0, sizeof(um)); + um.proto = m->proto; + um.mode = m->mode; + um.reqid = m->reqid; + um.old_family = m->old_family; + memcpy(&um.old_daddr, &m->old_daddr, sizeof(um.old_daddr)); + memcpy(&um.old_saddr, &m->old_saddr, sizeof(um.old_saddr)); + um.new_family = m->new_family; + memcpy(&um.new_daddr, &m->new_daddr, sizeof(um.new_daddr)); + memcpy(&um.new_saddr, &m->new_saddr, sizeof(um.new_saddr)); + + return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); +} + +static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff *skb) +{ + struct xfrm_user_kmaddress uk; + + memset(&uk, 0, sizeof(uk)); + uk.family = k->family; + uk.reserved = k->reserved; + memcpy(&uk.local, &k->local, sizeof(uk.local)); + memcpy(&uk.remote, &k->remote, sizeof(uk.remote)); + + return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); +} + +static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) + + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); +} + +static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, + int num_migrate, const struct xfrm_kmaddress *k, + const struct xfrm_selector *sel, u8 dir, u8 type) +{ + const struct xfrm_migrate *mp; + struct xfrm_userpolicy_id *pol_id; + struct nlmsghdr *nlh; + int i, err; + + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0); + if (nlh == NULL) + return -EMSGSIZE; + + pol_id = nlmsg_data(nlh); + /* copy data from selector, dir, and type to the pol_id */ + memset(pol_id, 0, sizeof(*pol_id)); + memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); + pol_id->dir = dir; + + if (k != NULL) { + err = copy_to_user_kmaddress(k, skb); + if (err) + goto out_cancel; + } + err = copy_to_user_policy_type(type, skb); + if (err) + goto out_cancel; + for (i = 0, mp = m ; i < num_migrate; i++, mp++) { + err = copy_to_user_migrate(mp, skb); + if (err) + goto out_cancel; + } + + return nlmsg_end(skb, nlh); + +out_cancel: + nlmsg_cancel(skb, nlh); + return err; +} + +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) +{ + struct net *net = &init_net; + struct sk_buff *skb; + + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + /* build migrate */ + if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) + BUG(); + + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); +} +#else +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) +{ + return -ENOPROTOOPT; +} +#endif + +#define XMSGSIZE(type) sizeof(struct type) static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), @@ -1053,266 +2268,313 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire), [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), - [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), + [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, + [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), + [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), + [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), + [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), + [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), }; #undef XMSGSIZE -static struct xfrm_link { - int (*doit)(struct sk_buff *, struct nlmsghdr *, void **); +static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { + [XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)}, + [XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)}, + [XFRMA_LASTUSED] = { .type = NLA_U64}, + [XFRMA_ALG_AUTH_TRUNC] = { .len = sizeof(struct xfrm_algo_auth)}, + [XFRMA_ALG_AEAD] = { .len = sizeof(struct xfrm_algo_aead) }, + [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, + [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, + [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) }, + [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, + [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, + [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, + [XFRMA_ETIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SRCADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, + [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, + [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, + [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, + [XFRMA_TFCPAD] = { .type = NLA_U32 }, + [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, + [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, + [XFRMA_PROTO] = { .type = NLA_U8 }, + [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, +}; + +static const struct xfrm_link { + int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); + int (*done)(struct netlink_callback *); } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, [XFRM_MSG_DELSA - XFRM_MSG_BASE] = { .doit = xfrm_del_sa }, [XFRM_MSG_GETSA - XFRM_MSG_BASE] = { .doit = xfrm_get_sa, - .dump = xfrm_dump_sa }, + .dump = xfrm_dump_sa, + .done = xfrm_dump_sa_done }, [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, - .dump = xfrm_dump_policy }, + .dump = xfrm_dump_policy, + .done = xfrm_dump_policy_done }, [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, + [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_acquire }, + [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_sa_expire }, [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, + [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_pol_expire}, [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = { .doit = xfrm_flush_sa }, [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, + [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = { .doit = xfrm_new_ae }, + [XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae }, + [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate }, + [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo }, + [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo }, }; -static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) +static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct rtattr *xfrma[XFRMA_MAX]; - struct xfrm_link *link; - int type, min_len; - - if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) - return 0; + struct net *net = sock_net(skb->sk); + struct nlattr *attrs[XFRMA_MAX+1]; + const struct xfrm_link *link; + int type, err; type = nlh->nlmsg_type; - - /* A control message: ignore them */ - if (type < XFRM_MSG_BASE) - return 0; - - /* Unknown message: reply with EINVAL */ if (type > XFRM_MSG_MAX) - goto err_einval; + return -EINVAL; type -= XFRM_MSG_BASE; link = &xfrm_dispatch[type]; /* All operations require privileges, even GET */ - if (security_netlink_recv(skb)) { - *errp = -EPERM; - return -1; - } + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) + return -EPERM; if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && (nlh->nlmsg_flags & NLM_F_DUMP)) { if (link->dump == NULL) - goto err_einval; + return -EINVAL; - if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh, - link->dump, NULL)) != 0) { - return -1; + { + struct netlink_dump_control c = { + .dump = link->dump, + .done = link->done, + }; + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); } - - netlink_queue_skip(nlh, skb); - return -1; } - memset(xfrma, 0, sizeof(xfrma)); - - if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type])) - goto err_einval; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned short flavor = attr->rta_type; - if (flavor) { - if (flavor > XFRMA_MAX) - goto err_einval; - xfrma[flavor - 1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } + err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, + xfrma_policy); + if (err < 0) + return err; if (link->doit == NULL) - goto err_einval; - *errp = link->doit(skb, nlh, (void **) &xfrma); - - return *errp; + return -EINVAL; -err_einval: - *errp = -EINVAL; - return -1; + return link->doit(skb, nlh, attrs); } -static void xfrm_netlink_rcv(struct sock *sk, int len) +static void xfrm_netlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; + struct net *net = sock_net(skb->sk); - do { - down(&xfrm_cfg_sem); - netlink_run_queue(sk, &qlen, &xfrm_user_rcv_msg); - up(&xfrm_cfg_sem); + mutex_lock(&net->xfrm.xfrm_cfg_mutex); + netlink_rcv_skb(skb, &xfrm_user_rcv_msg); + mutex_unlock(&net->xfrm.xfrm_cfg_mutex); +} - } while (qlen); +static inline size_t xfrm_expire_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + + nla_total_size(sizeof(struct xfrm_mark)); } -static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard) +static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + int err; - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE, - sizeof(*ue)); - ue = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + if (nlh == NULL) + return -EMSGSIZE; + ue = nlmsg_data(nlh); copy_to_user_state(x, &ue->state); - ue->hard = (hard != 0) ? 1 : 0; + ue->hard = (c->data.hard != 0) ? 1 : 0; - nlh->nlmsg_len = skb->tail - b; - return skb->len; + err = xfrm_mark_put(skb, &x->mark); + if (err) + return err; + + return nlmsg_end(skb, nlh); +} + +static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) +{ + struct net *net = xs_net(x); + struct sk_buff *skb; + + skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_expire(skb, x, c) < 0) { + kfree_skb(skb); + return -EMSGSIZE; + } -nlmsg_failure: - skb_trim(skb, b - skb->data); - return -1; + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } -static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c) { + struct net *net = xs_net(x); struct sk_buff *skb; - int len = NLMSG_LENGTH(sizeof(struct xfrm_user_expire)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_expire(skb, x, c->data.hard) < 0) + if (build_aevent(skb, x, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); } -static int xfrm_notify_sa_flush(struct km_event *c) +static int xfrm_notify_sa_flush(const struct km_event *c) { + struct net *net = c->net; struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; - unsigned char *b; - int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)); + int len = NLMSG_ALIGN(sizeof(struct xfrm_usersa_flush)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, - XFRM_MSG_FLUSHSA, sizeof(*p)); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + if (nlh == NULL) { + kfree_skb(skb); + return -EMSGSIZE; + } - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); p->proto = c->data.proto; - nlh->nlmsg_len = skb->tail - b; - - NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + nlmsg_end(skb, nlh); -nlmsg_failure: - kfree_skb(skb); - return -1; + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); } -static int inline xfrm_sa_len(struct xfrm_state *x) +static inline size_t xfrm_sa_len(struct xfrm_state *x) { - int l = 0; - if (x->aalg) - l += RTA_SPACE(sizeof(*x->aalg) + (x->aalg->alg_key_len+7)/8); + size_t l = 0; + if (x->aead) + l += nla_total_size(aead_len(x->aead)); + if (x->aalg) { + l += nla_total_size(sizeof(struct xfrm_algo) + + (x->aalg->alg_key_len + 7) / 8); + l += nla_total_size(xfrm_alg_auth_len(x->aalg)); + } if (x->ealg) - l += RTA_SPACE(sizeof(*x->ealg) + (x->ealg->alg_key_len+7)/8); + l += nla_total_size(xfrm_alg_len(x->ealg)); if (x->calg) - l += RTA_SPACE(sizeof(*x->calg)); + l += nla_total_size(sizeof(*x->calg)); if (x->encap) - l += RTA_SPACE(sizeof(*x->encap)); + l += nla_total_size(sizeof(*x->encap)); + if (x->tfcpad) + l += nla_total_size(sizeof(x->tfcpad)); + if (x->replay_esn) + l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn)); + if (x->security) + l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + + x->security->ctx_len); + if (x->coaddr) + l += nla_total_size(sizeof(*x->coaddr)); + if (x->props.extra_flags) + l += nla_total_size(sizeof(x->props.extra_flags)); + + /* Must count x->lastused as it may become non-zero behind our back. */ + l += nla_total_size(sizeof(u64)); return l; } -static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) +static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) { + struct net *net = xs_net(x); struct xfrm_usersa_info *p; struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - unsigned char *b; int len = xfrm_sa_len(x); - int headlen; + int headlen, err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { - len += RTA_SPACE(headlen); + len += nla_total_size(headlen); headlen = sizeof(*id); + len += nla_total_size(sizeof(struct xfrm_mark)); } - len += NLMSG_SPACE(headlen); + len += NLMSG_ALIGN(headlen); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); + err = -EMSGSIZE; + if (nlh == NULL) + goto out_free_skb; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELSA) { - id = NLMSG_DATA(nlh); + struct nlattr *attr; + + id = nlmsg_data(nlh); memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr)); id->spi = x->id.spi; id->family = x->props.family; id->proto = x->id.proto; - p = RTA_DATA(__RTA_PUT(skb, XFRMA_SA, sizeof(*p))); - } - - copy_to_user_state(x, p); + attr = nla_reserve(skb, XFRMA_SA, sizeof(*p)); + err = -EMSGSIZE; + if (attr == NULL) + goto out_free_skb; - if (x->aalg) - RTA_PUT(skb, XFRMA_ALG_AUTH, - sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); - if (x->ealg) - RTA_PUT(skb, XFRMA_ALG_CRYPT, - sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); - if (x->calg) - RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); - - if (x->encap) - RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + p = nla_data(attr); + } + err = copy_to_user_state_extra(x, p, skb); + if (err) + goto out_free_skb; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); -nlmsg_failure: -rtattr_failure: +out_free_skb: kfree_skb(skb); - return -1; + return err; } -static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c) { switch (c->event) { case XFRM_MSG_EXPIRE: return xfrm_exp_state_notify(x, c); + case XFRM_MSG_NEWAE: + return xfrm_aevent_state_notify(x, c); case XFRM_MSG_DELSA: case XFRM_MSG_UPDSA: case XFRM_MSG_NEWSA: @@ -1320,89 +2582,98 @@ static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) case XFRM_MSG_FLUSHSA: return xfrm_notify_sa_flush(c); default: - printk("xfrm_user: Unknown SA event %d\n", c->event); - break; + printk(KERN_NOTICE "xfrm_user: Unknown SA event %d\n", + c->event); + break; } return 0; } +static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, + struct xfrm_policy *xp) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(sizeof(struct xfrm_mark)) + + nla_total_size(xfrm_user_sec_ctx_size(x->security)) + + userpolicy_type_attrsize(); +} + static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, - struct xfrm_tmpl *xt, struct xfrm_policy *xp, - int dir) + struct xfrm_tmpl *xt, struct xfrm_policy *xp) { + __u32 seq = xfrm_get_acqseq(); struct xfrm_user_acquire *ua; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - __u32 seq = xfrm_get_acqseq(); + int err; - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE, - sizeof(*ua)); - ua = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0); + if (nlh == NULL) + return -EMSGSIZE; + ua = nlmsg_data(nlh); memcpy(&ua->id, &x->id, sizeof(ua->id)); memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); - copy_to_user_policy(xp, &ua->policy, dir); + copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT); ua->aalgos = xt->aalgos; ua->ealgos = xt->ealgos; ua->calgos = xt->calgos; ua->seq = x->km.seq = seq; - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) - goto nlmsg_failure; - - nlh->nlmsg_len = skb->tail - b; - return skb->len; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_state_sec_ctx(x, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } -nlmsg_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, - struct xfrm_policy *xp, int dir) + struct xfrm_policy *xp) { + struct net *net = xs_net(x); struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); - len += RTA_SPACE(xfrm_user_sec_ctx_size(xp)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_acquire(skb, x, xt, xp, dir) < 0) + if (build_acquire(skb, x, xt, xp) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); } /* User gives us xfrm_user_policy_info followed by an array of 0 * or more templates. */ -static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, +static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { + struct net *net = sock_net(sk); struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1); struct xfrm_policy *xp; int nr; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_XFRM_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (opt != IPV6_XFRM_POLICY) { *dir = -EOPNOTSUPP; @@ -1422,19 +2693,20 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, return NULL; nr = ((len - sizeof(*p)) / sizeof(*ut)); - if (nr > XFRM_MAX_DEPTH) + if (validate_tmpl(nr, ut, p->sel.family)) return NULL; if (p->dir > XFRM_POLICY_OUT) return NULL; - xp = xfrm_policy_alloc(GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; } copy_from_user_policy(xp, p); + xp->type = XFRM_POLICY_TYPE_MAIN; copy_templates(xp, ut, nr); *dir = p->dir; @@ -1442,78 +2714,93 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, return xp; } +static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(xfrm_user_sec_ctx_size(xp->security)) + + nla_total_size(sizeof(struct xfrm_mark)) + + userpolicy_type_attrsize(); +} + static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, - int dir, int hard) + int dir, const struct km_event *c) { struct xfrm_user_polexpire *upe; + int hard = c->data.hard; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + int err; - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe)); - upe = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + if (nlh == NULL) + return -EMSGSIZE; + upe = nlmsg_data(nlh); copy_to_user_policy(xp, &upe->pol, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) - goto nlmsg_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_sec_ctx(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } upe->hard = !!hard; - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); } -static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { + struct net *net = xp_net(xp); struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); - len += RTA_SPACE(xfrm_user_sec_ctx_size(xp)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_polexpire(skb, xp, dir, c->data.hard) < 0) + if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } -static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { + int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - unsigned char *b; - int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - int headlen; + int headlen, err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { - len += RTA_SPACE(headlen); + len += nla_total_size(headlen); headlen = sizeof(*id); } - len += NLMSG_SPACE(headlen); + len += userpolicy_type_attrsize(); + len += nla_total_size(sizeof(struct xfrm_mark)); + len += NLMSG_ALIGN(headlen); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); + err = -EMSGSIZE; + if (nlh == NULL) + goto out_free_skb; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELPOLICY) { - id = NLMSG_DATA(nlh); + struct nlattr *attr; + + id = nlmsg_data(nlh); memset(id, 0, sizeof(*id)); id->dir = dir; if (c->data.byid) @@ -1521,52 +2808,61 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * else memcpy(&id->sel, &xp->selector, sizeof(id->sel)); - p = RTA_DATA(__RTA_PUT(skb, XFRMA_POLICY, sizeof(*p))); - } + attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p)); + err = -EMSGSIZE; + if (attr == NULL) + goto out_free_skb; - nlh->nlmsg_flags = 0; + p = nla_data(attr); + } copy_to_user_policy(xp, p, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) + goto out_free_skb; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); -nlmsg_failure: -rtattr_failure: +out_free_skb: kfree_skb(skb); - return -1; + return err; } -static int xfrm_notify_policy_flush(struct km_event *c) +static int xfrm_notify_policy_flush(const struct km_event *c) { + struct net *net = c->net; struct nlmsghdr *nlh; struct sk_buff *skb; - unsigned char *b; - int len = NLMSG_LENGTH(0); + int err; - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - - nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + err = -EMSGSIZE; + if (nlh == NULL) + goto out_free_skb; + err = copy_to_user_policy_type(c->data.type, skb); + if (err) + goto out_free_skb; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); -nlmsg_failure: +out_free_skb: kfree_skb(skb); - return -1; + return err; } -static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { switch (c->event) { @@ -1579,42 +2875,181 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev case XFRM_MSG_POLEXPIRE: return xfrm_exp_policy_notify(xp, dir, c); default: - printk("xfrm_user: Unknown Policy event %d\n", c->event); + printk(KERN_NOTICE "xfrm_user: Unknown Policy event %d\n", + c->event); } return 0; } +static inline size_t xfrm_report_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); +} + +static int build_report(struct sk_buff *skb, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) +{ + struct xfrm_user_report *ur; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur), 0); + if (nlh == NULL) + return -EMSGSIZE; + + ur = nlmsg_data(nlh); + ur->proto = proto; + memcpy(&ur->sel, sel, sizeof(ur->sel)); + + if (addr) { + int err = nla_put(skb, XFRMA_COADDR, sizeof(*addr), addr); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + } + return nlmsg_end(skb, nlh); +} + +static int xfrm_send_report(struct net *net, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) +{ + struct sk_buff *skb; + + skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_report(skb, proto, sel, addr) < 0) + BUG(); + + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); +} + +static inline size_t xfrm_mapping_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); +} + +static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, + xfrm_address_t *new_saddr, __be16 new_sport) +{ + struct xfrm_user_mapping *um; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MAPPING, sizeof(*um), 0); + if (nlh == NULL) + return -EMSGSIZE; + + um = nlmsg_data(nlh); + + memcpy(&um->id.daddr, &x->id.daddr, sizeof(um->id.daddr)); + um->id.spi = x->id.spi; + um->id.family = x->props.family; + um->id.proto = x->id.proto; + memcpy(&um->new_saddr, new_saddr, sizeof(um->new_saddr)); + memcpy(&um->old_saddr, &x->props.saddr, sizeof(um->old_saddr)); + um->new_sport = new_sport; + um->old_sport = x->encap->encap_sport; + um->reqid = x->props.reqid; + + return nlmsg_end(skb, nlh); +} + +static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, + __be16 sport) +{ + struct net *net = xs_net(x); + struct sk_buff *skb; + + if (x->id.proto != IPPROTO_ESP) + return -EINVAL; + + if (!x->encap) + return -EINVAL; + + skb = nlmsg_new(xfrm_mapping_msgsize(), GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_mapping(skb, x, ipaddr, sport) < 0) + BUG(); + + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); +} + +static bool xfrm_is_alive(const struct km_event *c) +{ + return (bool)xfrm_acquire_is_on(c->net); +} + static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, + .report = xfrm_send_report, + .migrate = xfrm_send_migrate, + .new_mapping = xfrm_send_mapping, + .is_alive = xfrm_is_alive, }; -static int __init xfrm_user_init(void) +static int __net_init xfrm_user_net_init(struct net *net) { - printk(KERN_INFO "Initializing IPsec netlink socket\n"); + struct sock *nlsk; + struct netlink_kernel_cfg cfg = { + .groups = XFRMNLGRP_MAX, + .input = xfrm_netlink_rcv, + }; - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, - xfrm_netlink_rcv, THIS_MODULE); - if (xfrm_nl == NULL) + nlsk = netlink_kernel_create(net, NETLINK_XFRM, &cfg); + if (nlsk == NULL) return -ENOMEM; + net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ + rcu_assign_pointer(net->xfrm.nlsk, nlsk); + return 0; +} - xfrm_register_km(&netlink_mgr); +static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) +{ + struct net *net; + list_for_each_entry(net, net_exit_list, exit_list) + RCU_INIT_POINTER(net->xfrm.nlsk, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->xfrm.nlsk_stash); +} - return 0; +static struct pernet_operations xfrm_user_net_ops = { + .init = xfrm_user_net_init, + .exit_batch = xfrm_user_net_exit, +}; + +static int __init xfrm_user_init(void) +{ + int rv; + + printk(KERN_INFO "Initializing XFRM netlink socket\n"); + + rv = register_pernet_subsys(&xfrm_user_net_ops); + if (rv < 0) + return rv; + rv = xfrm_register_km(&netlink_mgr); + if (rv < 0) + unregister_pernet_subsys(&xfrm_user_net_ops); + return rv; } static void __exit xfrm_user_exit(void) { xfrm_unregister_km(&netlink_mgr); - sock_release(xfrm_nl->sk_socket); + unregister_pernet_subsys(&xfrm_user_net_ops); } module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM); + |
