diff options
Diffstat (limited to 'drivers/crypto/ccp')
| -rw-r--r-- | drivers/crypto/ccp/Kconfig | 24 | ||||
| -rw-r--r-- | drivers/crypto/ccp/Makefile | 10 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 365 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-crypto-aes-xts.c | 279 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-crypto-aes.c | 369 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-crypto-main.c | 388 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-crypto-sha.c | 437 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-crypto.h | 197 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-dev.c | 608 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-dev.h | 272 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-ops.c | 2126 | ||||
| -rw-r--r-- | drivers/crypto/ccp/ccp-pci.c | 360 | 
12 files changed, 5435 insertions, 0 deletions
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig new file mode 100644 index 00000000000..7639ffc36c6 --- /dev/null +++ b/drivers/crypto/ccp/Kconfig @@ -0,0 +1,24 @@ +config CRYPTO_DEV_CCP_DD +	tristate "Cryptographic Coprocessor device driver" +	depends on CRYPTO_DEV_CCP +	default m +	select HW_RANDOM +	help +	  Provides the interface to use the AMD Cryptographic Coprocessor +	  which can be used to accelerate or offload encryption operations +	  such as SHA, AES and more. If you choose 'M' here, this module +	  will be called ccp. + +config CRYPTO_DEV_CCP_CRYPTO +	tristate "Encryption and hashing acceleration support" +	depends on CRYPTO_DEV_CCP_DD +	default m +	select CRYPTO_ALGAPI +	select CRYPTO_HASH +	select CRYPTO_BLKCIPHER +	select CRYPTO_AUTHENC +	help +	  Support for using the cryptographic API with the AMD Cryptographic +	  Coprocessor. This module supports acceleration and offload of SHA +	  and AES algorithms.  If you choose 'M' here, this module will be +	  called ccp_crypto. diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile new file mode 100644 index 00000000000..d3505a01872 --- /dev/null +++ b/drivers/crypto/ccp/Makefile @@ -0,0 +1,10 @@ +obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o +ccp-objs := ccp-dev.o ccp-ops.o +ccp-objs += ccp-pci.o + +obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o +ccp-crypto-objs := ccp-crypto-main.o \ +		   ccp-crypto-aes.o \ +		   ccp-crypto-aes-cmac.o \ +		   ccp-crypto-aes-xts.o \ +		   ccp-crypto-sha.o diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c new file mode 100644 index 00000000000..8e162ad8208 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -0,0 +1,365 @@ +/* + * AMD Cryptographic Coprocessor (CCP) AES CMAC crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/hash.h> +#include <crypto/internal/hash.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + + +static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, +				 int ret) +{ +	struct ahash_request *req = ahash_request_cast(async_req); +	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); +	unsigned int digest_size = crypto_ahash_digestsize(tfm); + +	if (ret) +		goto e_free; + +	if (rctx->hash_rem) { +		/* Save remaining data to buffer */ +		unsigned int offset = rctx->nbytes - rctx->hash_rem; +		scatterwalk_map_and_copy(rctx->buf, rctx->src, +					 offset, rctx->hash_rem, 0); +		rctx->buf_count = rctx->hash_rem; +	} else +		rctx->buf_count = 0; + +	/* Update result area if supplied */ +	if (req->result) +		memcpy(req->result, rctx->iv, digest_size); + +e_free: +	sg_free_table(&rctx->data_sg); + +	return ret; +} + +static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes, +			      unsigned int final) +{ +	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); +	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); +	struct scatterlist *sg, *cmac_key_sg = NULL; +	unsigned int block_size = +		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); +	unsigned int need_pad, sg_count; +	gfp_t gfp; +	u64 len; +	int ret; + +	if (!ctx->u.aes.key_len) +		return -EINVAL; + +	if (nbytes) +		rctx->null_msg = 0; + +	len = (u64)rctx->buf_count + (u64)nbytes; + +	if (!final && (len <= block_size)) { +		scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src, +					 0, nbytes, 0); +		rctx->buf_count += nbytes; + +		return 0; +	} + +	rctx->src = req->src; +	rctx->nbytes = nbytes; + +	rctx->final = final; +	rctx->hash_rem = final ? 0 : len & (block_size - 1); +	rctx->hash_cnt = len - rctx->hash_rem; +	if (!final && !rctx->hash_rem) { +		/* CCP can't do zero length final, so keep some data around */ +		rctx->hash_cnt -= block_size; +		rctx->hash_rem = block_size; +	} + +	if (final && (rctx->null_msg || (len & (block_size - 1)))) +		need_pad = 1; +	else +		need_pad = 0; + +	sg_init_one(&rctx->iv_sg, rctx->iv, sizeof(rctx->iv)); + +	/* Build the data scatterlist table - allocate enough entries for all +	 * possible data pieces (buffer, input data, padding) +	 */ +	sg_count = (nbytes) ? sg_nents(req->src) + 2 : 2; +	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? +		GFP_KERNEL : GFP_ATOMIC; +	ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp); +	if (ret) +		return ret; + +	sg = NULL; +	if (rctx->buf_count) { +		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); +		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg); +	} + +	if (nbytes) +		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src); + +	if (need_pad) { +		int pad_length = block_size - (len & (block_size - 1)); + +		rctx->hash_cnt += pad_length; + +		memset(rctx->pad, 0, sizeof(rctx->pad)); +		rctx->pad[0] = 0x80; +		sg_init_one(&rctx->pad_sg, rctx->pad, pad_length); +		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->pad_sg); +	} +	if (sg) { +		sg_mark_end(sg); +		sg = rctx->data_sg.sgl; +	} + +	/* Initialize the K1/K2 scatterlist */ +	if (final) +		cmac_key_sg = (need_pad) ? &ctx->u.aes.k2_sg +					 : &ctx->u.aes.k1_sg; + +	memset(&rctx->cmd, 0, sizeof(rctx->cmd)); +	INIT_LIST_HEAD(&rctx->cmd.entry); +	rctx->cmd.engine = CCP_ENGINE_AES; +	rctx->cmd.u.aes.type = ctx->u.aes.type; +	rctx->cmd.u.aes.mode = ctx->u.aes.mode; +	rctx->cmd.u.aes.action = CCP_AES_ACTION_ENCRYPT; +	rctx->cmd.u.aes.key = &ctx->u.aes.key_sg; +	rctx->cmd.u.aes.key_len = ctx->u.aes.key_len; +	rctx->cmd.u.aes.iv = &rctx->iv_sg; +	rctx->cmd.u.aes.iv_len = AES_BLOCK_SIZE; +	rctx->cmd.u.aes.src = sg; +	rctx->cmd.u.aes.src_len = rctx->hash_cnt; +	rctx->cmd.u.aes.dst = NULL; +	rctx->cmd.u.aes.cmac_key = cmac_key_sg; +	rctx->cmd.u.aes.cmac_key_len = ctx->u.aes.kn_len; +	rctx->cmd.u.aes.cmac_final = final; + +	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + +	return ret; +} + +static int ccp_aes_cmac_init(struct ahash_request *req) +{ +	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + +	memset(rctx, 0, sizeof(*rctx)); + +	rctx->null_msg = 1; + +	return 0; +} + +static int ccp_aes_cmac_update(struct ahash_request *req) +{ +	return ccp_do_cmac_update(req, req->nbytes, 0); +} + +static int ccp_aes_cmac_final(struct ahash_request *req) +{ +	return ccp_do_cmac_update(req, 0, 1); +} + +static int ccp_aes_cmac_finup(struct ahash_request *req) +{ +	return ccp_do_cmac_update(req, req->nbytes, 1); +} + +static int ccp_aes_cmac_digest(struct ahash_request *req) +{ +	int ret; + +	ret = ccp_aes_cmac_init(req); +	if (ret) +		return ret; + +	return ccp_aes_cmac_finup(req); +} + +static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, +			   unsigned int key_len) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); +	struct ccp_crypto_ahash_alg *alg = +		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); +	u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo; +	u64 rb_hi = 0x00, rb_lo = 0x87; +	__be64 *gk; +	int ret; + +	switch (key_len) { +	case AES_KEYSIZE_128: +		ctx->u.aes.type = CCP_AES_TYPE_128; +		break; +	case AES_KEYSIZE_192: +		ctx->u.aes.type = CCP_AES_TYPE_192; +		break; +	case AES_KEYSIZE_256: +		ctx->u.aes.type = CCP_AES_TYPE_256; +		break; +	default: +		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); +		return -EINVAL; +	} +	ctx->u.aes.mode = alg->mode; + +	/* Set to zero until complete */ +	ctx->u.aes.key_len = 0; + +	/* Set the key for the AES cipher used to generate the keys */ +	ret = crypto_cipher_setkey(ctx->u.aes.tfm_cipher, key, key_len); +	if (ret) +		return ret; + +	/* Encrypt a block of zeroes - use key area in context */ +	memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key)); +	crypto_cipher_encrypt_one(ctx->u.aes.tfm_cipher, ctx->u.aes.key, +				  ctx->u.aes.key); + +	/* Generate K1 and K2 */ +	k0_hi = be64_to_cpu(*((__be64 *)ctx->u.aes.key)); +	k0_lo = be64_to_cpu(*((__be64 *)ctx->u.aes.key + 1)); + +	k1_hi = (k0_hi << 1) | (k0_lo >> 63); +	k1_lo = k0_lo << 1; +	if (ctx->u.aes.key[0] & 0x80) { +		k1_hi ^= rb_hi; +		k1_lo ^= rb_lo; +	} +	gk = (__be64 *)ctx->u.aes.k1; +	*gk = cpu_to_be64(k1_hi); +	gk++; +	*gk = cpu_to_be64(k1_lo); + +	k2_hi = (k1_hi << 1) | (k1_lo >> 63); +	k2_lo = k1_lo << 1; +	if (ctx->u.aes.k1[0] & 0x80) { +		k2_hi ^= rb_hi; +		k2_lo ^= rb_lo; +	} +	gk = (__be64 *)ctx->u.aes.k2; +	*gk = cpu_to_be64(k2_hi); +	gk++; +	*gk = cpu_to_be64(k2_lo); + +	ctx->u.aes.kn_len = sizeof(ctx->u.aes.k1); +	sg_init_one(&ctx->u.aes.k1_sg, ctx->u.aes.k1, sizeof(ctx->u.aes.k1)); +	sg_init_one(&ctx->u.aes.k2_sg, ctx->u.aes.k2, sizeof(ctx->u.aes.k2)); + +	/* Save the supplied key */ +	memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key)); +	memcpy(ctx->u.aes.key, key, key_len); +	ctx->u.aes.key_len = key_len; +	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + +	return ret; +} + +static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); +	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); +	struct crypto_cipher *cipher_tfm; + +	ctx->complete = ccp_aes_cmac_complete; +	ctx->u.aes.key_len = 0; + +	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); + +	cipher_tfm = crypto_alloc_cipher("aes", 0, +			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); +	if (IS_ERR(cipher_tfm)) { +		pr_warn("could not load aes cipher driver\n"); +		return PTR_ERR(cipher_tfm); +	} +	ctx->u.aes.tfm_cipher = cipher_tfm; + +	return 0; +} + +static void ccp_aes_cmac_cra_exit(struct crypto_tfm *tfm) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + +	if (ctx->u.aes.tfm_cipher) +		crypto_free_cipher(ctx->u.aes.tfm_cipher); +	ctx->u.aes.tfm_cipher = NULL; +} + +int ccp_register_aes_cmac_algs(struct list_head *head) +{ +	struct ccp_crypto_ahash_alg *ccp_alg; +	struct ahash_alg *alg; +	struct hash_alg_common *halg; +	struct crypto_alg *base; +	int ret; + +	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); +	if (!ccp_alg) +		return -ENOMEM; + +	INIT_LIST_HEAD(&ccp_alg->entry); +	ccp_alg->mode = CCP_AES_MODE_CMAC; + +	alg = &ccp_alg->alg; +	alg->init = ccp_aes_cmac_init; +	alg->update = ccp_aes_cmac_update; +	alg->final = ccp_aes_cmac_final; +	alg->finup = ccp_aes_cmac_finup; +	alg->digest = ccp_aes_cmac_digest; +	alg->setkey = ccp_aes_cmac_setkey; + +	halg = &alg->halg; +	halg->digestsize = AES_BLOCK_SIZE; + +	base = &halg->base; +	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)"); +	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "cmac-aes-ccp"); +	base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | +			  CRYPTO_ALG_KERN_DRIVER_ONLY | +			  CRYPTO_ALG_NEED_FALLBACK; +	base->cra_blocksize = AES_BLOCK_SIZE; +	base->cra_ctxsize = sizeof(struct ccp_ctx); +	base->cra_priority = CCP_CRA_PRIORITY; +	base->cra_type = &crypto_ahash_type; +	base->cra_init = ccp_aes_cmac_cra_init; +	base->cra_exit = ccp_aes_cmac_cra_exit; +	base->cra_module = THIS_MODULE; + +	ret = crypto_register_ahash(alg); +	if (ret) { +		pr_err("%s ahash algorithm registration error (%d)\n", +			base->cra_name, ret); +		kfree(ccp_alg); +		return ret; +	} + +	list_add(&ccp_alg->entry, head); + +	return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c new file mode 100644 index 00000000000..0cc5594b7de --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -0,0 +1,279 @@ +/* + * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + + +struct ccp_aes_xts_def { +	const char *name; +	const char *drv_name; +}; + +static struct ccp_aes_xts_def aes_xts_algs[] = { +	{ +		.name		= "xts(aes)", +		.drv_name	= "xts-aes-ccp", +	}, +}; + +struct ccp_unit_size_map { +	unsigned int size; +	u32 value; +}; + +static struct ccp_unit_size_map unit_size_map[] = { +	{ +		.size	= 4096, +		.value	= CCP_XTS_AES_UNIT_SIZE_4096, +	}, +	{ +		.size	= 2048, +		.value	= CCP_XTS_AES_UNIT_SIZE_2048, +	}, +	{ +		.size	= 1024, +		.value	= CCP_XTS_AES_UNIT_SIZE_1024, +	}, +	{ +		.size	= 512, +		.value	= CCP_XTS_AES_UNIT_SIZE_512, +	}, +	{ +		.size	= 256, +		.value	= CCP_XTS_AES_UNIT_SIZE__LAST, +	}, +	{ +		.size	= 128, +		.value	= CCP_XTS_AES_UNIT_SIZE__LAST, +	}, +	{ +		.size	= 64, +		.value	= CCP_XTS_AES_UNIT_SIZE__LAST, +	}, +	{ +		.size	= 32, +		.value	= CCP_XTS_AES_UNIT_SIZE__LAST, +	}, +	{ +		.size	= 16, +		.value	= CCP_XTS_AES_UNIT_SIZE_16, +	}, +	{ +		.size	= 1, +		.value	= CCP_XTS_AES_UNIT_SIZE__LAST, +	}, +}; + +static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret) +{ +	struct ablkcipher_request *req = ablkcipher_request_cast(async_req); +	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + +	if (ret) +		return ret; + +	memcpy(req->info, rctx->iv, AES_BLOCK_SIZE); + +	return 0; +} + +static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, +			      unsigned int key_len) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); + +	/* Only support 128-bit AES key with a 128-bit Tweak key, +	 * otherwise use the fallback +	 */ +	switch (key_len) { +	case AES_KEYSIZE_128 * 2: +		memcpy(ctx->u.aes.key, key, key_len); +		break; +	} +	ctx->u.aes.key_len = key_len / 2; +	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + +	return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key, +					key_len); +} + +static int ccp_aes_xts_crypt(struct ablkcipher_request *req, +			     unsigned int encrypt) +{ +	struct crypto_tfm *tfm = +		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); +	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); +	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); +	unsigned int unit; +	int ret; + +	if (!ctx->u.aes.key_len) +		return -EINVAL; + +	if (req->nbytes & (AES_BLOCK_SIZE - 1)) +		return -EINVAL; + +	if (!req->info) +		return -EINVAL; + +	for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) +		if (!(req->nbytes & (unit_size_map[unit].size - 1))) +			break; + +	if ((unit_size_map[unit].value == CCP_XTS_AES_UNIT_SIZE__LAST) || +	    (ctx->u.aes.key_len != AES_KEYSIZE_128)) { +		/* Use the fallback to process the request for any +		 * unsupported unit sizes or key sizes +		 */ +		ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher); +		ret = (encrypt) ? crypto_ablkcipher_encrypt(req) : +				  crypto_ablkcipher_decrypt(req); +		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + +		return ret; +	} + +	memcpy(rctx->iv, req->info, AES_BLOCK_SIZE); +	sg_init_one(&rctx->iv_sg, rctx->iv, AES_BLOCK_SIZE); + +	memset(&rctx->cmd, 0, sizeof(rctx->cmd)); +	INIT_LIST_HEAD(&rctx->cmd.entry); +	rctx->cmd.engine = CCP_ENGINE_XTS_AES_128; +	rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT +					   : CCP_AES_ACTION_DECRYPT; +	rctx->cmd.u.xts.unit_size = unit_size_map[unit].value; +	rctx->cmd.u.xts.key = &ctx->u.aes.key_sg; +	rctx->cmd.u.xts.key_len = ctx->u.aes.key_len; +	rctx->cmd.u.xts.iv = &rctx->iv_sg; +	rctx->cmd.u.xts.iv_len = AES_BLOCK_SIZE; +	rctx->cmd.u.xts.src = req->src; +	rctx->cmd.u.xts.src_len = req->nbytes; +	rctx->cmd.u.xts.dst = req->dst; + +	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + +	return ret; +} + +static int ccp_aes_xts_encrypt(struct ablkcipher_request *req) +{ +	return ccp_aes_xts_crypt(req, 1); +} + +static int ccp_aes_xts_decrypt(struct ablkcipher_request *req) +{ +	return ccp_aes_xts_crypt(req, 0); +} + +static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); +	struct crypto_ablkcipher *fallback_tfm; + +	ctx->complete = ccp_aes_xts_complete; +	ctx->u.aes.key_len = 0; + +	fallback_tfm = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), 0, +					       CRYPTO_ALG_ASYNC | +					       CRYPTO_ALG_NEED_FALLBACK); +	if (IS_ERR(fallback_tfm)) { +		pr_warn("could not load fallback driver %s\n", +			crypto_tfm_alg_name(tfm)); +		return PTR_ERR(fallback_tfm); +	} +	ctx->u.aes.tfm_ablkcipher = fallback_tfm; + +	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) + +				      fallback_tfm->base.crt_ablkcipher.reqsize; + +	return 0; +} + +static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + +	if (ctx->u.aes.tfm_ablkcipher) +		crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher); +	ctx->u.aes.tfm_ablkcipher = NULL; +} + + +static int ccp_register_aes_xts_alg(struct list_head *head, +				    const struct ccp_aes_xts_def *def) +{ +	struct ccp_crypto_ablkcipher_alg *ccp_alg; +	struct crypto_alg *alg; +	int ret; + +	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); +	if (!ccp_alg) +		return -ENOMEM; + +	INIT_LIST_HEAD(&ccp_alg->entry); + +	alg = &ccp_alg->alg; + +	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); +	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", +		 def->drv_name); +	alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC | +			 CRYPTO_ALG_KERN_DRIVER_ONLY | +			 CRYPTO_ALG_NEED_FALLBACK; +	alg->cra_blocksize = AES_BLOCK_SIZE; +	alg->cra_ctxsize = sizeof(struct ccp_ctx); +	alg->cra_priority = CCP_CRA_PRIORITY; +	alg->cra_type = &crypto_ablkcipher_type; +	alg->cra_ablkcipher.setkey = ccp_aes_xts_setkey; +	alg->cra_ablkcipher.encrypt = ccp_aes_xts_encrypt; +	alg->cra_ablkcipher.decrypt = ccp_aes_xts_decrypt; +	alg->cra_ablkcipher.min_keysize = AES_MIN_KEY_SIZE * 2; +	alg->cra_ablkcipher.max_keysize = AES_MAX_KEY_SIZE * 2; +	alg->cra_ablkcipher.ivsize = AES_BLOCK_SIZE; +	alg->cra_init = ccp_aes_xts_cra_init; +	alg->cra_exit = ccp_aes_xts_cra_exit; +	alg->cra_module = THIS_MODULE; + +	ret = crypto_register_alg(alg); +	if (ret) { +		pr_err("%s ablkcipher algorithm registration error (%d)\n", +			alg->cra_name, ret); +		kfree(ccp_alg); +		return ret; +	} + +	list_add(&ccp_alg->entry, head); + +	return 0; +} + +int ccp_register_aes_xts_algs(struct list_head *head) +{ +	int i, ret; + +	for (i = 0; i < ARRAY_SIZE(aes_xts_algs); i++) { +		ret = ccp_register_aes_xts_alg(head, &aes_xts_algs[i]); +		if (ret) +			return ret; +	} + +	return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c new file mode 100644 index 00000000000..e46490db0f6 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes.c @@ -0,0 +1,369 @@ +/* + * AMD Cryptographic Coprocessor (CCP) AES crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/ctr.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + + +static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) +{ +	struct ablkcipher_request *req = ablkcipher_request_cast(async_req); +	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); +	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + +	if (ret) +		return ret; + +	if (ctx->u.aes.mode != CCP_AES_MODE_ECB) +		memcpy(req->info, rctx->iv, AES_BLOCK_SIZE); + +	return 0; +} + +static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, +			  unsigned int key_len) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); +	struct ccp_crypto_ablkcipher_alg *alg = +		ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm)); + +	switch (key_len) { +	case AES_KEYSIZE_128: +		ctx->u.aes.type = CCP_AES_TYPE_128; +		break; +	case AES_KEYSIZE_192: +		ctx->u.aes.type = CCP_AES_TYPE_192; +		break; +	case AES_KEYSIZE_256: +		ctx->u.aes.type = CCP_AES_TYPE_256; +		break; +	default: +		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); +		return -EINVAL; +	} +	ctx->u.aes.mode = alg->mode; +	ctx->u.aes.key_len = key_len; + +	memcpy(ctx->u.aes.key, key, key_len); +	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + +	return 0; +} + +static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); +	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); +	struct scatterlist *iv_sg = NULL; +	unsigned int iv_len = 0; +	int ret; + +	if (!ctx->u.aes.key_len) +		return -EINVAL; + +	if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) || +	     (ctx->u.aes.mode == CCP_AES_MODE_CBC) || +	     (ctx->u.aes.mode == CCP_AES_MODE_CFB)) && +	    (req->nbytes & (AES_BLOCK_SIZE - 1))) +		return -EINVAL; + +	if (ctx->u.aes.mode != CCP_AES_MODE_ECB) { +		if (!req->info) +			return -EINVAL; + +		memcpy(rctx->iv, req->info, AES_BLOCK_SIZE); +		iv_sg = &rctx->iv_sg; +		iv_len = AES_BLOCK_SIZE; +		sg_init_one(iv_sg, rctx->iv, iv_len); +	} + +	memset(&rctx->cmd, 0, sizeof(rctx->cmd)); +	INIT_LIST_HEAD(&rctx->cmd.entry); +	rctx->cmd.engine = CCP_ENGINE_AES; +	rctx->cmd.u.aes.type = ctx->u.aes.type; +	rctx->cmd.u.aes.mode = ctx->u.aes.mode; +	rctx->cmd.u.aes.action = +		(encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT; +	rctx->cmd.u.aes.key = &ctx->u.aes.key_sg; +	rctx->cmd.u.aes.key_len = ctx->u.aes.key_len; +	rctx->cmd.u.aes.iv = iv_sg; +	rctx->cmd.u.aes.iv_len = iv_len; +	rctx->cmd.u.aes.src = req->src; +	rctx->cmd.u.aes.src_len = req->nbytes; +	rctx->cmd.u.aes.dst = req->dst; + +	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + +	return ret; +} + +static int ccp_aes_encrypt(struct ablkcipher_request *req) +{ +	return ccp_aes_crypt(req, true); +} + +static int ccp_aes_decrypt(struct ablkcipher_request *req) +{ +	return ccp_aes_crypt(req, false); +} + +static int ccp_aes_cra_init(struct crypto_tfm *tfm) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + +	ctx->complete = ccp_aes_complete; +	ctx->u.aes.key_len = 0; + +	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); + +	return 0; +} + +static void ccp_aes_cra_exit(struct crypto_tfm *tfm) +{ +} + +static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req, +				    int ret) +{ +	struct ablkcipher_request *req = ablkcipher_request_cast(async_req); +	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + +	/* Restore the original pointer */ +	req->info = rctx->rfc3686_info; + +	return ccp_aes_complete(async_req, ret); +} + +static int ccp_aes_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key, +				  unsigned int key_len) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); + +	if (key_len < CTR_RFC3686_NONCE_SIZE) +		return -EINVAL; + +	key_len -= CTR_RFC3686_NONCE_SIZE; +	memcpy(ctx->u.aes.nonce, key + key_len, CTR_RFC3686_NONCE_SIZE); + +	return ccp_aes_setkey(tfm, key, key_len); +} + +static int ccp_aes_rfc3686_crypt(struct ablkcipher_request *req, bool encrypt) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); +	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); +	u8 *iv; + +	/* Initialize the CTR block */ +	iv = rctx->rfc3686_iv; +	memcpy(iv, ctx->u.aes.nonce, CTR_RFC3686_NONCE_SIZE); + +	iv += CTR_RFC3686_NONCE_SIZE; +	memcpy(iv, req->info, CTR_RFC3686_IV_SIZE); + +	iv += CTR_RFC3686_IV_SIZE; +	*(__be32 *)iv = cpu_to_be32(1); + +	/* Point to the new IV */ +	rctx->rfc3686_info = req->info; +	req->info = rctx->rfc3686_iv; + +	return ccp_aes_crypt(req, encrypt); +} + +static int ccp_aes_rfc3686_encrypt(struct ablkcipher_request *req) +{ +	return ccp_aes_rfc3686_crypt(req, true); +} + +static int ccp_aes_rfc3686_decrypt(struct ablkcipher_request *req) +{ +	return ccp_aes_rfc3686_crypt(req, false); +} + +static int ccp_aes_rfc3686_cra_init(struct crypto_tfm *tfm) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + +	ctx->complete = ccp_aes_rfc3686_complete; +	ctx->u.aes.key_len = 0; + +	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); + +	return 0; +} + +static void ccp_aes_rfc3686_cra_exit(struct crypto_tfm *tfm) +{ +} + +static struct crypto_alg ccp_aes_defaults = { +	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER | +			  CRYPTO_ALG_ASYNC | +			  CRYPTO_ALG_KERN_DRIVER_ONLY | +			  CRYPTO_ALG_NEED_FALLBACK, +	.cra_blocksize	= AES_BLOCK_SIZE, +	.cra_ctxsize	= sizeof(struct ccp_ctx), +	.cra_priority	= CCP_CRA_PRIORITY, +	.cra_type	= &crypto_ablkcipher_type, +	.cra_init	= ccp_aes_cra_init, +	.cra_exit	= ccp_aes_cra_exit, +	.cra_module	= THIS_MODULE, +	.cra_ablkcipher	= { +		.setkey		= ccp_aes_setkey, +		.encrypt	= ccp_aes_encrypt, +		.decrypt	= ccp_aes_decrypt, +		.min_keysize	= AES_MIN_KEY_SIZE, +		.max_keysize	= AES_MAX_KEY_SIZE, +	}, +}; + +static struct crypto_alg ccp_aes_rfc3686_defaults = { +	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER | +			   CRYPTO_ALG_ASYNC | +			   CRYPTO_ALG_KERN_DRIVER_ONLY | +			   CRYPTO_ALG_NEED_FALLBACK, +	.cra_blocksize	= CTR_RFC3686_BLOCK_SIZE, +	.cra_ctxsize	= sizeof(struct ccp_ctx), +	.cra_priority	= CCP_CRA_PRIORITY, +	.cra_type	= &crypto_ablkcipher_type, +	.cra_init	= ccp_aes_rfc3686_cra_init, +	.cra_exit	= ccp_aes_rfc3686_cra_exit, +	.cra_module	= THIS_MODULE, +	.cra_ablkcipher	= { +		.setkey		= ccp_aes_rfc3686_setkey, +		.encrypt	= ccp_aes_rfc3686_encrypt, +		.decrypt	= ccp_aes_rfc3686_decrypt, +		.min_keysize	= AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, +		.max_keysize	= AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, +	}, +}; + +struct ccp_aes_def { +	enum ccp_aes_mode mode; +	const char *name; +	const char *driver_name; +	unsigned int blocksize; +	unsigned int ivsize; +	struct crypto_alg *alg_defaults; +}; + +static struct ccp_aes_def aes_algs[] = { +	{ +		.mode		= CCP_AES_MODE_ECB, +		.name		= "ecb(aes)", +		.driver_name	= "ecb-aes-ccp", +		.blocksize	= AES_BLOCK_SIZE, +		.ivsize		= 0, +		.alg_defaults	= &ccp_aes_defaults, +	}, +	{ +		.mode		= CCP_AES_MODE_CBC, +		.name		= "cbc(aes)", +		.driver_name	= "cbc-aes-ccp", +		.blocksize	= AES_BLOCK_SIZE, +		.ivsize		= AES_BLOCK_SIZE, +		.alg_defaults	= &ccp_aes_defaults, +	}, +	{ +		.mode		= CCP_AES_MODE_CFB, +		.name		= "cfb(aes)", +		.driver_name	= "cfb-aes-ccp", +		.blocksize	= AES_BLOCK_SIZE, +		.ivsize		= AES_BLOCK_SIZE, +		.alg_defaults	= &ccp_aes_defaults, +	}, +	{ +		.mode		= CCP_AES_MODE_OFB, +		.name		= "ofb(aes)", +		.driver_name	= "ofb-aes-ccp", +		.blocksize	= 1, +		.ivsize		= AES_BLOCK_SIZE, +		.alg_defaults	= &ccp_aes_defaults, +	}, +	{ +		.mode		= CCP_AES_MODE_CTR, +		.name		= "ctr(aes)", +		.driver_name	= "ctr-aes-ccp", +		.blocksize	= 1, +		.ivsize		= AES_BLOCK_SIZE, +		.alg_defaults	= &ccp_aes_defaults, +	}, +	{ +		.mode		= CCP_AES_MODE_CTR, +		.name		= "rfc3686(ctr(aes))", +		.driver_name	= "rfc3686-ctr-aes-ccp", +		.blocksize	= 1, +		.ivsize		= CTR_RFC3686_IV_SIZE, +		.alg_defaults	= &ccp_aes_rfc3686_defaults, +	}, +}; + +static int ccp_register_aes_alg(struct list_head *head, +				const struct ccp_aes_def *def) +{ +	struct ccp_crypto_ablkcipher_alg *ccp_alg; +	struct crypto_alg *alg; +	int ret; + +	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); +	if (!ccp_alg) +		return -ENOMEM; + +	INIT_LIST_HEAD(&ccp_alg->entry); + +	ccp_alg->mode = def->mode; + +	/* Copy the defaults and override as necessary */ +	alg = &ccp_alg->alg; +	*alg = *def->alg_defaults; +	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); +	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", +		 def->driver_name); +	alg->cra_blocksize = def->blocksize; +	alg->cra_ablkcipher.ivsize = def->ivsize; + +	ret = crypto_register_alg(alg); +	if (ret) { +		pr_err("%s ablkcipher algorithm registration error (%d)\n", +			alg->cra_name, ret); +		kfree(ccp_alg); +		return ret; +	} + +	list_add(&ccp_alg->entry, head); + +	return 0; +} + +int ccp_register_aes_algs(struct list_head *head) +{ +	int i, ret; + +	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { +		ret = ccp_register_aes_alg(head, &aes_algs[i]); +		if (ret) +			return ret; +	} + +	return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c new file mode 100644 index 00000000000..20dc848481e --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -0,0 +1,388 @@ +/* + * AMD Cryptographic Coprocessor (CCP) crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/ccp.h> +#include <linux/scatterlist.h> +#include <crypto/internal/hash.h> + +#include "ccp-crypto.h" + +MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); +MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support"); + +static unsigned int aes_disable; +module_param(aes_disable, uint, 0444); +MODULE_PARM_DESC(aes_disable, "Disable use of AES - any non-zero value"); + +static unsigned int sha_disable; +module_param(sha_disable, uint, 0444); +MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value"); + + +/* List heads for the supported algorithms */ +static LIST_HEAD(hash_algs); +static LIST_HEAD(cipher_algs); + +/* For any tfm, requests for that tfm must be returned on the order + * received.  With multiple queues available, the CCP can process more + * than one cmd at a time.  Therefore we must maintain a cmd list to insure + * the proper ordering of requests on a given tfm. + */ +struct ccp_crypto_queue { +	struct list_head cmds; +	struct list_head *backlog; +	unsigned int cmd_count; +}; +#define CCP_CRYPTO_MAX_QLEN	100 + +static struct ccp_crypto_queue req_queue; +static spinlock_t req_queue_lock; + +struct ccp_crypto_cmd { +	struct list_head entry; + +	struct ccp_cmd *cmd; + +	/* Save the crypto_tfm and crypto_async_request addresses +	 * separately to avoid any reference to a possibly invalid +	 * crypto_async_request structure after invoking the request +	 * callback +	 */ +	struct crypto_async_request *req; +	struct crypto_tfm *tfm; + +	/* Used for held command processing to determine state */ +	int ret; +}; + +struct ccp_crypto_cpu { +	struct work_struct work; +	struct completion completion; +	struct ccp_crypto_cmd *crypto_cmd; +	int err; +}; + + +static inline bool ccp_crypto_success(int err) +{ +	if (err && (err != -EINPROGRESS) && (err != -EBUSY)) +		return false; + +	return true; +} + +static struct ccp_crypto_cmd *ccp_crypto_cmd_complete( +	struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog) +{ +	struct ccp_crypto_cmd *held = NULL, *tmp; +	unsigned long flags; + +	*backlog = NULL; + +	spin_lock_irqsave(&req_queue_lock, flags); + +	/* Held cmds will be after the current cmd in the queue so start +	 * searching for a cmd with a matching tfm for submission. +	 */ +	tmp = crypto_cmd; +	list_for_each_entry_continue(tmp, &req_queue.cmds, entry) { +		if (crypto_cmd->tfm != tmp->tfm) +			continue; +		held = tmp; +		break; +	} + +	/* Process the backlog: +	 *   Because cmds can be executed from any point in the cmd list +	 *   special precautions have to be taken when handling the backlog. +	 */ +	if (req_queue.backlog != &req_queue.cmds) { +		/* Skip over this cmd if it is the next backlog cmd */ +		if (req_queue.backlog == &crypto_cmd->entry) +			req_queue.backlog = crypto_cmd->entry.next; + +		*backlog = container_of(req_queue.backlog, +					struct ccp_crypto_cmd, entry); +		req_queue.backlog = req_queue.backlog->next; + +		/* Skip over this cmd if it is now the next backlog cmd */ +		if (req_queue.backlog == &crypto_cmd->entry) +			req_queue.backlog = crypto_cmd->entry.next; +	} + +	/* Remove the cmd entry from the list of cmds */ +	req_queue.cmd_count--; +	list_del(&crypto_cmd->entry); + +	spin_unlock_irqrestore(&req_queue_lock, flags); + +	return held; +} + +static void ccp_crypto_complete(void *data, int err) +{ +	struct ccp_crypto_cmd *crypto_cmd = data; +	struct ccp_crypto_cmd *held, *next, *backlog; +	struct crypto_async_request *req = crypto_cmd->req; +	struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm); +	int ret; + +	if (err == -EINPROGRESS) { +		/* Only propogate the -EINPROGRESS if necessary */ +		if (crypto_cmd->ret == -EBUSY) { +			crypto_cmd->ret = -EINPROGRESS; +			req->complete(req, -EINPROGRESS); +		} + +		return; +	} + +	/* Operation has completed - update the queue before invoking +	 * the completion callbacks and retrieve the next cmd (cmd with +	 * a matching tfm) that can be submitted to the CCP. +	 */ +	held = ccp_crypto_cmd_complete(crypto_cmd, &backlog); +	if (backlog) { +		backlog->ret = -EINPROGRESS; +		backlog->req->complete(backlog->req, -EINPROGRESS); +	} + +	/* Transition the state from -EBUSY to -EINPROGRESS first */ +	if (crypto_cmd->ret == -EBUSY) +		req->complete(req, -EINPROGRESS); + +	/* Completion callbacks */ +	ret = err; +	if (ctx->complete) +		ret = ctx->complete(req, ret); +	req->complete(req, ret); + +	/* Submit the next cmd */ +	while (held) { +		/* Since we have already queued the cmd, we must indicate that +		 * we can backlog so as not to "lose" this request. +		 */ +		held->cmd->flags |= CCP_CMD_MAY_BACKLOG; +		ret = ccp_enqueue_cmd(held->cmd); +		if (ccp_crypto_success(ret)) +			break; + +		/* Error occurred, report it and get the next entry */ +		ctx = crypto_tfm_ctx(held->req->tfm); +		if (ctx->complete) +			ret = ctx->complete(held->req, ret); +		held->req->complete(held->req, ret); + +		next = ccp_crypto_cmd_complete(held, &backlog); +		if (backlog) { +			backlog->ret = -EINPROGRESS; +			backlog->req->complete(backlog->req, -EINPROGRESS); +		} + +		kfree(held); +		held = next; +	} + +	kfree(crypto_cmd); +} + +static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) +{ +	struct ccp_crypto_cmd *active = NULL, *tmp; +	unsigned long flags; +	bool free_cmd = true; +	int ret; + +	spin_lock_irqsave(&req_queue_lock, flags); + +	/* Check if the cmd can/should be queued */ +	if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { +		ret = -EBUSY; +		if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) +			goto e_lock; +	} + +	/* Look for an entry with the same tfm.  If there is a cmd +	 * with the same tfm in the list then the current cmd cannot +	 * be submitted to the CCP yet. +	 */ +	list_for_each_entry(tmp, &req_queue.cmds, entry) { +		if (crypto_cmd->tfm != tmp->tfm) +			continue; +		active = tmp; +		break; +	} + +	ret = -EINPROGRESS; +	if (!active) { +		ret = ccp_enqueue_cmd(crypto_cmd->cmd); +		if (!ccp_crypto_success(ret)) +			goto e_lock;	/* Error, don't queue it */ +		if ((ret == -EBUSY) && +		    !(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) +			goto e_lock;	/* Not backlogging, don't queue it */ +	} + +	if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { +		ret = -EBUSY; +		if (req_queue.backlog == &req_queue.cmds) +			req_queue.backlog = &crypto_cmd->entry; +	} +	crypto_cmd->ret = ret; + +	req_queue.cmd_count++; +	list_add_tail(&crypto_cmd->entry, &req_queue.cmds); + +	free_cmd = false; + +e_lock: +	spin_unlock_irqrestore(&req_queue_lock, flags); + +	if (free_cmd) +		kfree(crypto_cmd); + +	return ret; +} + +/** + * ccp_crypto_enqueue_request - queue an crypto async request for processing + *				by the CCP + * + * @req: crypto_async_request struct to be processed + * @cmd: ccp_cmd struct to be sent to the CCP + */ +int ccp_crypto_enqueue_request(struct crypto_async_request *req, +			       struct ccp_cmd *cmd) +{ +	struct ccp_crypto_cmd *crypto_cmd; +	gfp_t gfp; + +	gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; + +	crypto_cmd = kzalloc(sizeof(*crypto_cmd), gfp); +	if (!crypto_cmd) +		return -ENOMEM; + +	/* The tfm pointer must be saved and not referenced from the +	 * crypto_async_request (req) pointer because it is used after +	 * completion callback for the request and the req pointer +	 * might not be valid anymore. +	 */ +	crypto_cmd->cmd = cmd; +	crypto_cmd->req = req; +	crypto_cmd->tfm = req->tfm; + +	cmd->callback = ccp_crypto_complete; +	cmd->data = crypto_cmd; + +	if (req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG) +		cmd->flags |= CCP_CMD_MAY_BACKLOG; +	else +		cmd->flags &= ~CCP_CMD_MAY_BACKLOG; + +	return ccp_crypto_enqueue_cmd(crypto_cmd); +} + +struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table, +					    struct scatterlist *sg_add) +{ +	struct scatterlist *sg, *sg_last = NULL; + +	for (sg = table->sgl; sg; sg = sg_next(sg)) +		if (!sg_page(sg)) +			break; +	BUG_ON(!sg); + +	for (; sg && sg_add; sg = sg_next(sg), sg_add = sg_next(sg_add)) { +		sg_set_page(sg, sg_page(sg_add), sg_add->length, +			    sg_add->offset); +		sg_last = sg; +	} +	BUG_ON(sg_add); + +	return sg_last; +} + +static int ccp_register_algs(void) +{ +	int ret; + +	if (!aes_disable) { +		ret = ccp_register_aes_algs(&cipher_algs); +		if (ret) +			return ret; + +		ret = ccp_register_aes_cmac_algs(&hash_algs); +		if (ret) +			return ret; + +		ret = ccp_register_aes_xts_algs(&cipher_algs); +		if (ret) +			return ret; +	} + +	if (!sha_disable) { +		ret = ccp_register_sha_algs(&hash_algs); +		if (ret) +			return ret; +	} + +	return 0; +} + +static void ccp_unregister_algs(void) +{ +	struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp; +	struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp; + +	list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) { +		crypto_unregister_ahash(&ahash_alg->alg); +		list_del(&ahash_alg->entry); +		kfree(ahash_alg); +	} + +	list_for_each_entry_safe(ablk_alg, ablk_tmp, &cipher_algs, entry) { +		crypto_unregister_alg(&ablk_alg->alg); +		list_del(&ablk_alg->entry); +		kfree(ablk_alg); +	} +} + +static int ccp_crypto_init(void) +{ +	int ret; + +	spin_lock_init(&req_queue_lock); +	INIT_LIST_HEAD(&req_queue.cmds); +	req_queue.backlog = &req_queue.cmds; +	req_queue.cmd_count = 0; + +	ret = ccp_register_algs(); +	if (ret) +		ccp_unregister_algs(); + +	return ret; +} + +static void ccp_crypto_exit(void) +{ +	ccp_unregister_algs(); +} + +module_init(ccp_crypto_init); +module_exit(ccp_crypto_exit); diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c new file mode 100644 index 00000000000..873f2342524 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -0,0 +1,437 @@ +/* + * AMD Cryptographic Coprocessor (CCP) SHA crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/hash.h> +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + + +static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) +{ +	struct ahash_request *req = ahash_request_cast(async_req); +	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); +	unsigned int digest_size = crypto_ahash_digestsize(tfm); + +	if (ret) +		goto e_free; + +	if (rctx->hash_rem) { +		/* Save remaining data to buffer */ +		unsigned int offset = rctx->nbytes - rctx->hash_rem; +		scatterwalk_map_and_copy(rctx->buf, rctx->src, +					 offset, rctx->hash_rem, 0); +		rctx->buf_count = rctx->hash_rem; +	} else +		rctx->buf_count = 0; + +	/* Update result area if supplied */ +	if (req->result) +		memcpy(req->result, rctx->ctx, digest_size); + +e_free: +	sg_free_table(&rctx->data_sg); + +	return ret; +} + +static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes, +			     unsigned int final) +{ +	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); +	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); +	struct scatterlist *sg; +	unsigned int block_size = +		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); +	unsigned int sg_count; +	gfp_t gfp; +	u64 len; +	int ret; + +	len = (u64)rctx->buf_count + (u64)nbytes; + +	if (!final && (len <= block_size)) { +		scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src, +					 0, nbytes, 0); +		rctx->buf_count += nbytes; + +		return 0; +	} + +	rctx->src = req->src; +	rctx->nbytes = nbytes; + +	rctx->final = final; +	rctx->hash_rem = final ? 0 : len & (block_size - 1); +	rctx->hash_cnt = len - rctx->hash_rem; +	if (!final && !rctx->hash_rem) { +		/* CCP can't do zero length final, so keep some data around */ +		rctx->hash_cnt -= block_size; +		rctx->hash_rem = block_size; +	} + +	/* Initialize the context scatterlist */ +	sg_init_one(&rctx->ctx_sg, rctx->ctx, sizeof(rctx->ctx)); + +	sg = NULL; +	if (rctx->buf_count && nbytes) { +		/* Build the data scatterlist table - allocate enough entries +		 * for both data pieces (buffer and input data) +		 */ +		gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? +			GFP_KERNEL : GFP_ATOMIC; +		sg_count = sg_nents(req->src) + 1; +		ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp); +		if (ret) +			return ret; + +		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); +		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg); +		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src); +		sg_mark_end(sg); + +		sg = rctx->data_sg.sgl; +	} else if (rctx->buf_count) { +		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); + +		sg = &rctx->buf_sg; +	} else if (nbytes) { +		sg = req->src; +	} + +	rctx->msg_bits += (rctx->hash_cnt << 3);	/* Total in bits */ + +	memset(&rctx->cmd, 0, sizeof(rctx->cmd)); +	INIT_LIST_HEAD(&rctx->cmd.entry); +	rctx->cmd.engine = CCP_ENGINE_SHA; +	rctx->cmd.u.sha.type = rctx->type; +	rctx->cmd.u.sha.ctx = &rctx->ctx_sg; +	rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx); +	rctx->cmd.u.sha.src = sg; +	rctx->cmd.u.sha.src_len = rctx->hash_cnt; +	rctx->cmd.u.sha.opad = ctx->u.sha.key_len ? +		&ctx->u.sha.opad_sg : NULL; +	rctx->cmd.u.sha.opad_len = ctx->u.sha.key_len ? +		ctx->u.sha.opad_count : 0; +	rctx->cmd.u.sha.first = rctx->first; +	rctx->cmd.u.sha.final = rctx->final; +	rctx->cmd.u.sha.msg_bits = rctx->msg_bits; + +	rctx->first = 0; + +	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + +	return ret; +} + +static int ccp_sha_init(struct ahash_request *req) +{ +	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); +	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); +	struct ccp_crypto_ahash_alg *alg = +		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); +	unsigned int block_size = +		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + +	memset(rctx, 0, sizeof(*rctx)); + +	rctx->type = alg->type; +	rctx->first = 1; + +	if (ctx->u.sha.key_len) { +		/* Buffer the HMAC key for first update */ +		memcpy(rctx->buf, ctx->u.sha.ipad, block_size); +		rctx->buf_count = block_size; +	} + +	return 0; +} + +static int ccp_sha_update(struct ahash_request *req) +{ +	return ccp_do_sha_update(req, req->nbytes, 0); +} + +static int ccp_sha_final(struct ahash_request *req) +{ +	return ccp_do_sha_update(req, 0, 1); +} + +static int ccp_sha_finup(struct ahash_request *req) +{ +	return ccp_do_sha_update(req, req->nbytes, 1); +} + +static int ccp_sha_digest(struct ahash_request *req) +{ +	int ret; + +	ret = ccp_sha_init(req); +	if (ret) +		return ret; + +	return ccp_sha_finup(req); +} + +static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, +			  unsigned int key_len) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); +	struct crypto_shash *shash = ctx->u.sha.hmac_tfm; +	struct { +		struct shash_desc sdesc; +		char ctx[crypto_shash_descsize(shash)]; +	} desc; +	unsigned int block_size = crypto_shash_blocksize(shash); +	unsigned int digest_size = crypto_shash_digestsize(shash); +	int i, ret; + +	/* Set to zero until complete */ +	ctx->u.sha.key_len = 0; + +	/* Clear key area to provide zero padding for keys smaller +	 * than the block size +	 */ +	memset(ctx->u.sha.key, 0, sizeof(ctx->u.sha.key)); + +	if (key_len > block_size) { +		/* Must hash the input key */ +		desc.sdesc.tfm = shash; +		desc.sdesc.flags = crypto_ahash_get_flags(tfm) & +			CRYPTO_TFM_REQ_MAY_SLEEP; + +		ret = crypto_shash_digest(&desc.sdesc, key, key_len, +					  ctx->u.sha.key); +		if (ret) { +			crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); +			return -EINVAL; +		} + +		key_len = digest_size; +	} else +		memcpy(ctx->u.sha.key, key, key_len); + +	for (i = 0; i < block_size; i++) { +		ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36; +		ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c; +	} + +	sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size); +	ctx->u.sha.opad_count = block_size; + +	ctx->u.sha.key_len = key_len; + +	return 0; +} + +static int ccp_sha_cra_init(struct crypto_tfm *tfm) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); +	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + +	ctx->complete = ccp_sha_complete; +	ctx->u.sha.key_len = 0; + +	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx)); + +	return 0; +} + +static void ccp_sha_cra_exit(struct crypto_tfm *tfm) +{ +} + +static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); +	struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm); +	struct crypto_shash *hmac_tfm; + +	hmac_tfm = crypto_alloc_shash(alg->child_alg, 0, 0); +	if (IS_ERR(hmac_tfm)) { +		pr_warn("could not load driver %s need for HMAC support\n", +			alg->child_alg); +		return PTR_ERR(hmac_tfm); +	} + +	ctx->u.sha.hmac_tfm = hmac_tfm; + +	return ccp_sha_cra_init(tfm); +} + +static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm) +{ +	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + +	if (ctx->u.sha.hmac_tfm) +		crypto_free_shash(ctx->u.sha.hmac_tfm); + +	ccp_sha_cra_exit(tfm); +} + +struct ccp_sha_def { +	const char *name; +	const char *drv_name; +	enum ccp_sha_type type; +	u32 digest_size; +	u32 block_size; +}; + +static struct ccp_sha_def sha_algs[] = { +	{ +		.name		= "sha1", +		.drv_name	= "sha1-ccp", +		.type		= CCP_SHA_TYPE_1, +		.digest_size	= SHA1_DIGEST_SIZE, +		.block_size	= SHA1_BLOCK_SIZE, +	}, +	{ +		.name		= "sha224", +		.drv_name	= "sha224-ccp", +		.type		= CCP_SHA_TYPE_224, +		.digest_size	= SHA224_DIGEST_SIZE, +		.block_size	= SHA224_BLOCK_SIZE, +	}, +	{ +		.name		= "sha256", +		.drv_name	= "sha256-ccp", +		.type		= CCP_SHA_TYPE_256, +		.digest_size	= SHA256_DIGEST_SIZE, +		.block_size	= SHA256_BLOCK_SIZE, +	}, +}; + +static int ccp_register_hmac_alg(struct list_head *head, +				 const struct ccp_sha_def *def, +				 const struct ccp_crypto_ahash_alg *base_alg) +{ +	struct ccp_crypto_ahash_alg *ccp_alg; +	struct ahash_alg *alg; +	struct hash_alg_common *halg; +	struct crypto_alg *base; +	int ret; + +	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); +	if (!ccp_alg) +		return -ENOMEM; + +	/* Copy the base algorithm and only change what's necessary */ +	*ccp_alg = *base_alg; +	INIT_LIST_HEAD(&ccp_alg->entry); + +	strncpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME); + +	alg = &ccp_alg->alg; +	alg->setkey = ccp_sha_setkey; + +	halg = &alg->halg; + +	base = &halg->base; +	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", def->name); +	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s", +		 def->drv_name); +	base->cra_init = ccp_hmac_sha_cra_init; +	base->cra_exit = ccp_hmac_sha_cra_exit; + +	ret = crypto_register_ahash(alg); +	if (ret) { +		pr_err("%s ahash algorithm registration error (%d)\n", +			base->cra_name, ret); +		kfree(ccp_alg); +		return ret; +	} + +	list_add(&ccp_alg->entry, head); + +	return ret; +} + +static int ccp_register_sha_alg(struct list_head *head, +				const struct ccp_sha_def *def) +{ +	struct ccp_crypto_ahash_alg *ccp_alg; +	struct ahash_alg *alg; +	struct hash_alg_common *halg; +	struct crypto_alg *base; +	int ret; + +	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); +	if (!ccp_alg) +		return -ENOMEM; + +	INIT_LIST_HEAD(&ccp_alg->entry); + +	ccp_alg->type = def->type; + +	alg = &ccp_alg->alg; +	alg->init = ccp_sha_init; +	alg->update = ccp_sha_update; +	alg->final = ccp_sha_final; +	alg->finup = ccp_sha_finup; +	alg->digest = ccp_sha_digest; + +	halg = &alg->halg; +	halg->digestsize = def->digest_size; + +	base = &halg->base; +	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); +	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", +		 def->drv_name); +	base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | +			  CRYPTO_ALG_KERN_DRIVER_ONLY | +			  CRYPTO_ALG_NEED_FALLBACK; +	base->cra_blocksize = def->block_size; +	base->cra_ctxsize = sizeof(struct ccp_ctx); +	base->cra_priority = CCP_CRA_PRIORITY; +	base->cra_type = &crypto_ahash_type; +	base->cra_init = ccp_sha_cra_init; +	base->cra_exit = ccp_sha_cra_exit; +	base->cra_module = THIS_MODULE; + +	ret = crypto_register_ahash(alg); +	if (ret) { +		pr_err("%s ahash algorithm registration error (%d)\n", +			base->cra_name, ret); +		kfree(ccp_alg); +		return ret; +	} + +	list_add(&ccp_alg->entry, head); + +	ret = ccp_register_hmac_alg(head, def, ccp_alg); + +	return ret; +} + +int ccp_register_sha_algs(struct list_head *head) +{ +	int i, ret; + +	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) { +		ret = ccp_register_sha_alg(head, &sha_algs[i]); +		if (ret) +			return ret; +	} + +	return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h new file mode 100644 index 00000000000..9aa4ae184f7 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -0,0 +1,197 @@ +/* + * AMD Cryptographic Coprocessor (CCP) crypto API support + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CCP_CRYPTO_H__ +#define __CCP_CRYPTO_H__ + + +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/pci.h> +#include <linux/ccp.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/ctr.h> +#include <crypto/hash.h> +#include <crypto/sha.h> + + +#define CCP_CRA_PRIORITY	300 + +struct ccp_crypto_ablkcipher_alg { +	struct list_head entry; + +	u32 mode; + +	struct crypto_alg alg; +}; + +struct ccp_crypto_ahash_alg { +	struct list_head entry; + +	const __be32 *init; +	u32 type; +	u32 mode; + +	/* Child algorithm used for HMAC, CMAC, etc */ +	char child_alg[CRYPTO_MAX_ALG_NAME]; + +	struct ahash_alg alg; +}; + +static inline struct ccp_crypto_ablkcipher_alg * +	ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm) +{ +	struct crypto_alg *alg = tfm->__crt_alg; + +	return container_of(alg, struct ccp_crypto_ablkcipher_alg, alg); +} + +static inline struct ccp_crypto_ahash_alg * +	ccp_crypto_ahash_alg(struct crypto_tfm *tfm) +{ +	struct crypto_alg *alg = tfm->__crt_alg; +	struct ahash_alg *ahash_alg; + +	ahash_alg = container_of(alg, struct ahash_alg, halg.base); + +	return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg); +} + + +/***** AES related defines *****/ +struct ccp_aes_ctx { +	/* Fallback cipher for XTS with unsupported unit sizes */ +	struct crypto_ablkcipher *tfm_ablkcipher; + +	/* Cipher used to generate CMAC K1/K2 keys */ +	struct crypto_cipher *tfm_cipher; + +	enum ccp_engine engine; +	enum ccp_aes_type type; +	enum ccp_aes_mode mode; + +	struct scatterlist key_sg; +	unsigned int key_len; +	u8 key[AES_MAX_KEY_SIZE]; + +	u8 nonce[CTR_RFC3686_NONCE_SIZE]; + +	/* CMAC key structures */ +	struct scatterlist k1_sg; +	struct scatterlist k2_sg; +	unsigned int kn_len; +	u8 k1[AES_BLOCK_SIZE]; +	u8 k2[AES_BLOCK_SIZE]; +}; + +struct ccp_aes_req_ctx { +	struct scatterlist iv_sg; +	u8 iv[AES_BLOCK_SIZE]; + +	/* Fields used for RFC3686 requests */ +	u8 *rfc3686_info; +	u8 rfc3686_iv[AES_BLOCK_SIZE]; + +	struct ccp_cmd cmd; +}; + +struct ccp_aes_cmac_req_ctx { +	unsigned int null_msg; +	unsigned int final; + +	struct scatterlist *src; +	unsigned int nbytes; + +	u64 hash_cnt; +	unsigned int hash_rem; + +	struct sg_table data_sg; + +	struct scatterlist iv_sg; +	u8 iv[AES_BLOCK_SIZE]; + +	struct scatterlist buf_sg; +	unsigned int buf_count; +	u8 buf[AES_BLOCK_SIZE]; + +	struct scatterlist pad_sg; +	unsigned int pad_count; +	u8 pad[AES_BLOCK_SIZE]; + +	struct ccp_cmd cmd; +}; + +/***** SHA related defines *****/ +#define MAX_SHA_CONTEXT_SIZE	SHA256_DIGEST_SIZE +#define MAX_SHA_BLOCK_SIZE	SHA256_BLOCK_SIZE + +struct ccp_sha_ctx { +	struct scatterlist opad_sg; +	unsigned int opad_count; + +	unsigned int key_len; +	u8 key[MAX_SHA_BLOCK_SIZE]; +	u8 ipad[MAX_SHA_BLOCK_SIZE]; +	u8 opad[MAX_SHA_BLOCK_SIZE]; +	struct crypto_shash *hmac_tfm; +}; + +struct ccp_sha_req_ctx { +	enum ccp_sha_type type; + +	u64 msg_bits; + +	unsigned int first; +	unsigned int final; + +	struct scatterlist *src; +	unsigned int nbytes; + +	u64 hash_cnt; +	unsigned int hash_rem; + +	struct sg_table data_sg; + +	struct scatterlist ctx_sg; +	u8 ctx[MAX_SHA_CONTEXT_SIZE]; + +	struct scatterlist buf_sg; +	unsigned int buf_count; +	u8 buf[MAX_SHA_BLOCK_SIZE]; + +	/* CCP driver command */ +	struct ccp_cmd cmd; +}; + +/***** Common Context Structure *****/ +struct ccp_ctx { +	int (*complete)(struct crypto_async_request *req, int ret); + +	union { +		struct ccp_aes_ctx aes; +		struct ccp_sha_ctx sha; +	} u; +}; + +int ccp_crypto_enqueue_request(struct crypto_async_request *req, +			       struct ccp_cmd *cmd); +struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table, +					    struct scatterlist *sg_add); + +int ccp_register_aes_algs(struct list_head *head); +int ccp_register_aes_cmac_algs(struct list_head *head); +int ccp_register_aes_xts_algs(struct list_head *head); +int ccp_register_sha_algs(struct list_head *head); + +#endif diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c new file mode 100644 index 00000000000..2c7816149b0 --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.c @@ -0,0 +1,608 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/hw_random.h> +#include <linux/cpu.h> +#include <asm/cpu_device_id.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); +MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver"); + +struct ccp_tasklet_data { +	struct completion completion; +	struct ccp_cmd *cmd; +}; + + +static struct ccp_device *ccp_dev; +static inline struct ccp_device *ccp_get_device(void) +{ +	return ccp_dev; +} + +static inline void ccp_add_device(struct ccp_device *ccp) +{ +	ccp_dev = ccp; +} + +static inline void ccp_del_device(struct ccp_device *ccp) +{ +	ccp_dev = NULL; +} + +/** + * ccp_enqueue_cmd - queue an operation for processing by the CCP + * + * @cmd: ccp_cmd struct to be processed + * + * Queue a cmd to be processed by the CCP. If queueing the cmd + * would exceed the defined length of the cmd queue the cmd will + * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will + * result in a return code of -EBUSY. + * + * The callback routine specified in the ccp_cmd struct will be + * called to notify the caller of completion (if the cmd was not + * backlogged) or advancement out of the backlog. If the cmd has + * advanced out of the backlog the "err" value of the callback + * will be -EINPROGRESS. Any other "err" value during callback is + * the result of the operation. + * + * The cmd has been successfully queued if: + *   the return code is -EINPROGRESS or + *   the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set + */ +int ccp_enqueue_cmd(struct ccp_cmd *cmd) +{ +	struct ccp_device *ccp = ccp_get_device(); +	unsigned long flags; +	unsigned int i; +	int ret; + +	if (!ccp) +		return -ENODEV; + +	/* Caller must supply a callback routine */ +	if (!cmd->callback) +		return -EINVAL; + +	cmd->ccp = ccp; + +	spin_lock_irqsave(&ccp->cmd_lock, flags); + +	i = ccp->cmd_q_count; + +	if (ccp->cmd_count >= MAX_CMD_QLEN) { +		ret = -EBUSY; +		if (cmd->flags & CCP_CMD_MAY_BACKLOG) +			list_add_tail(&cmd->entry, &ccp->backlog); +	} else { +		ret = -EINPROGRESS; +		ccp->cmd_count++; +		list_add_tail(&cmd->entry, &ccp->cmd); + +		/* Find an idle queue */ +		if (!ccp->suspending) { +			for (i = 0; i < ccp->cmd_q_count; i++) { +				if (ccp->cmd_q[i].active) +					continue; + +				break; +			} +		} +	} + +	spin_unlock_irqrestore(&ccp->cmd_lock, flags); + +	/* If we found an idle queue, wake it up */ +	if (i < ccp->cmd_q_count) +		wake_up_process(ccp->cmd_q[i].kthread); + +	return ret; +} +EXPORT_SYMBOL_GPL(ccp_enqueue_cmd); + +static void ccp_do_cmd_backlog(struct work_struct *work) +{ +	struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work); +	struct ccp_device *ccp = cmd->ccp; +	unsigned long flags; +	unsigned int i; + +	cmd->callback(cmd->data, -EINPROGRESS); + +	spin_lock_irqsave(&ccp->cmd_lock, flags); + +	ccp->cmd_count++; +	list_add_tail(&cmd->entry, &ccp->cmd); + +	/* Find an idle queue */ +	for (i = 0; i < ccp->cmd_q_count; i++) { +		if (ccp->cmd_q[i].active) +			continue; + +		break; +	} + +	spin_unlock_irqrestore(&ccp->cmd_lock, flags); + +	/* If we found an idle queue, wake it up */ +	if (i < ccp->cmd_q_count) +		wake_up_process(ccp->cmd_q[i].kthread); +} + +static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q) +{ +	struct ccp_device *ccp = cmd_q->ccp; +	struct ccp_cmd *cmd = NULL; +	struct ccp_cmd *backlog = NULL; +	unsigned long flags; + +	spin_lock_irqsave(&ccp->cmd_lock, flags); + +	cmd_q->active = 0; + +	if (ccp->suspending) { +		cmd_q->suspended = 1; + +		spin_unlock_irqrestore(&ccp->cmd_lock, flags); +		wake_up_interruptible(&ccp->suspend_queue); + +		return NULL; +	} + +	if (ccp->cmd_count) { +		cmd_q->active = 1; + +		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); +		list_del(&cmd->entry); + +		ccp->cmd_count--; +	} + +	if (!list_empty(&ccp->backlog)) { +		backlog = list_first_entry(&ccp->backlog, struct ccp_cmd, +					   entry); +		list_del(&backlog->entry); +	} + +	spin_unlock_irqrestore(&ccp->cmd_lock, flags); + +	if (backlog) { +		INIT_WORK(&backlog->work, ccp_do_cmd_backlog); +		schedule_work(&backlog->work); +	} + +	return cmd; +} + +static void ccp_do_cmd_complete(unsigned long data) +{ +	struct ccp_tasklet_data *tdata = (struct ccp_tasklet_data *)data; +	struct ccp_cmd *cmd = tdata->cmd; + +	cmd->callback(cmd->data, cmd->ret); +	complete(&tdata->completion); +} + +static int ccp_cmd_queue_thread(void *data) +{ +	struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data; +	struct ccp_cmd *cmd; +	struct ccp_tasklet_data tdata; +	struct tasklet_struct tasklet; + +	tasklet_init(&tasklet, ccp_do_cmd_complete, (unsigned long)&tdata); + +	set_current_state(TASK_INTERRUPTIBLE); +	while (!kthread_should_stop()) { +		schedule(); + +		set_current_state(TASK_INTERRUPTIBLE); + +		cmd = ccp_dequeue_cmd(cmd_q); +		if (!cmd) +			continue; + +		__set_current_state(TASK_RUNNING); + +		/* Execute the command */ +		cmd->ret = ccp_run_cmd(cmd_q, cmd); + +		/* Schedule the completion callback */ +		tdata.cmd = cmd; +		init_completion(&tdata.completion); +		tasklet_schedule(&tasklet); +		wait_for_completion(&tdata.completion); +	} + +	__set_current_state(TASK_RUNNING); + +	return 0; +} + +static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ +	struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng); +	u32 trng_value; +	int len = min_t(int, sizeof(trng_value), max); + +	/* +	 * Locking is provided by the caller so we can update device +	 * hwrng-related fields safely +	 */ +	trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG); +	if (!trng_value) { +		/* Zero is returned if not data is available or if a +		 * bad-entropy error is present. Assume an error if +		 * we exceed TRNG_RETRIES reads of zero. +		 */ +		if (ccp->hwrng_retries++ > TRNG_RETRIES) +			return -EIO; + +		return 0; +	} + +	/* Reset the counter and save the rng value */ +	ccp->hwrng_retries = 0; +	memcpy(data, &trng_value, len); + +	return len; +} + +/** + * ccp_alloc_struct - allocate and initialize the ccp_device struct + * + * @dev: device struct of the CCP + */ +struct ccp_device *ccp_alloc_struct(struct device *dev) +{ +	struct ccp_device *ccp; + +	ccp = kzalloc(sizeof(*ccp), GFP_KERNEL); +	if (ccp == NULL) { +		dev_err(dev, "unable to allocate device struct\n"); +		return NULL; +	} +	ccp->dev = dev; + +	INIT_LIST_HEAD(&ccp->cmd); +	INIT_LIST_HEAD(&ccp->backlog); + +	spin_lock_init(&ccp->cmd_lock); +	mutex_init(&ccp->req_mutex); +	mutex_init(&ccp->ksb_mutex); +	ccp->ksb_count = KSB_COUNT; +	ccp->ksb_start = 0; + +	return ccp; +} + +/** + * ccp_init - initialize the CCP device + * + * @ccp: ccp_device struct + */ +int ccp_init(struct ccp_device *ccp) +{ +	struct device *dev = ccp->dev; +	struct ccp_cmd_queue *cmd_q; +	struct dma_pool *dma_pool; +	char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; +	unsigned int qmr, qim, i; +	int ret; + +	/* Find available queues */ +	qim = 0; +	qmr = ioread32(ccp->io_regs + Q_MASK_REG); +	for (i = 0; i < MAX_HW_QUEUES; i++) { +		if (!(qmr & (1 << i))) +			continue; + +		/* Allocate a dma pool for this queue */ +		snprintf(dma_pool_name, sizeof(dma_pool_name), "ccp_q%d", i); +		dma_pool = dma_pool_create(dma_pool_name, dev, +					   CCP_DMAPOOL_MAX_SIZE, +					   CCP_DMAPOOL_ALIGN, 0); +		if (!dma_pool) { +			dev_err(dev, "unable to allocate dma pool\n"); +			ret = -ENOMEM; +			goto e_pool; +		} + +		cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; +		ccp->cmd_q_count++; + +		cmd_q->ccp = ccp; +		cmd_q->id = i; +		cmd_q->dma_pool = dma_pool; + +		/* Reserve 2 KSB regions for the queue */ +		cmd_q->ksb_key = KSB_START + ccp->ksb_start++; +		cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++; +		ccp->ksb_count -= 2; + +		/* Preset some register values and masks that are queue +		 * number dependent +		 */ +		cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE + +				    (CMD_Q_STATUS_INCR * i); +		cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE + +					(CMD_Q_STATUS_INCR * i); +		cmd_q->int_ok = 1 << (i * 2); +		cmd_q->int_err = 1 << ((i * 2) + 1); + +		cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); + +		init_waitqueue_head(&cmd_q->int_queue); + +		/* Build queue interrupt mask (two interrupts per queue) */ +		qim |= cmd_q->int_ok | cmd_q->int_err; + +		dev_dbg(dev, "queue #%u available\n", i); +	} +	if (ccp->cmd_q_count == 0) { +		dev_notice(dev, "no command queues available\n"); +		ret = -EIO; +		goto e_pool; +	} +	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); + +	/* Disable and clear interrupts until ready */ +	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); +	for (i = 0; i < ccp->cmd_q_count; i++) { +		cmd_q = &ccp->cmd_q[i]; + +		ioread32(cmd_q->reg_int_status); +		ioread32(cmd_q->reg_status); +	} +	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); + +	/* Request an irq */ +	ret = ccp->get_irq(ccp); +	if (ret) { +		dev_err(dev, "unable to allocate an IRQ\n"); +		goto e_pool; +	} + +	/* Initialize the queues used to wait for KSB space and suspend */ +	init_waitqueue_head(&ccp->ksb_queue); +	init_waitqueue_head(&ccp->suspend_queue); + +	/* Create a kthread for each queue */ +	for (i = 0; i < ccp->cmd_q_count; i++) { +		struct task_struct *kthread; + +		cmd_q = &ccp->cmd_q[i]; + +		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, +					 "ccp-q%u", cmd_q->id); +		if (IS_ERR(kthread)) { +			dev_err(dev, "error creating queue thread (%ld)\n", +				PTR_ERR(kthread)); +			ret = PTR_ERR(kthread); +			goto e_kthread; +		} + +		cmd_q->kthread = kthread; +		wake_up_process(kthread); +	} + +	/* Register the RNG */ +	ccp->hwrng.name = "ccp-rng"; +	ccp->hwrng.read = ccp_trng_read; +	ret = hwrng_register(&ccp->hwrng); +	if (ret) { +		dev_err(dev, "error registering hwrng (%d)\n", ret); +		goto e_kthread; +	} + +	/* Make the device struct available before enabling interrupts */ +	ccp_add_device(ccp); + +	/* Enable interrupts */ +	iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); + +	return 0; + +e_kthread: +	for (i = 0; i < ccp->cmd_q_count; i++) +		if (ccp->cmd_q[i].kthread) +			kthread_stop(ccp->cmd_q[i].kthread); + +	ccp->free_irq(ccp); + +e_pool: +	for (i = 0; i < ccp->cmd_q_count; i++) +		dma_pool_destroy(ccp->cmd_q[i].dma_pool); + +	return ret; +} + +/** + * ccp_destroy - tear down the CCP device + * + * @ccp: ccp_device struct + */ +void ccp_destroy(struct ccp_device *ccp) +{ +	struct ccp_cmd_queue *cmd_q; +	struct ccp_cmd *cmd; +	unsigned int qim, i; + +	/* Remove general access to the device struct */ +	ccp_del_device(ccp); + +	/* Unregister the RNG */ +	hwrng_unregister(&ccp->hwrng); + +	/* Stop the queue kthreads */ +	for (i = 0; i < ccp->cmd_q_count; i++) +		if (ccp->cmd_q[i].kthread) +			kthread_stop(ccp->cmd_q[i].kthread); + +	/* Build queue interrupt mask (two interrupt masks per queue) */ +	qim = 0; +	for (i = 0; i < ccp->cmd_q_count; i++) { +		cmd_q = &ccp->cmd_q[i]; +		qim |= cmd_q->int_ok | cmd_q->int_err; +	} + +	/* Disable and clear interrupts */ +	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); +	for (i = 0; i < ccp->cmd_q_count; i++) { +		cmd_q = &ccp->cmd_q[i]; + +		ioread32(cmd_q->reg_int_status); +		ioread32(cmd_q->reg_status); +	} +	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); + +	ccp->free_irq(ccp); + +	for (i = 0; i < ccp->cmd_q_count; i++) +		dma_pool_destroy(ccp->cmd_q[i].dma_pool); + +	/* Flush the cmd and backlog queue */ +	while (!list_empty(&ccp->cmd)) { +		/* Invoke the callback directly with an error code */ +		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); +		list_del(&cmd->entry); +		cmd->callback(cmd->data, -ENODEV); +	} +	while (!list_empty(&ccp->backlog)) { +		/* Invoke the callback directly with an error code */ +		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); +		list_del(&cmd->entry); +		cmd->callback(cmd->data, -ENODEV); +	} +} + +/** + * ccp_irq_handler - handle interrupts generated by the CCP device + * + * @irq: the irq associated with the interrupt + * @data: the data value supplied when the irq was created + */ +irqreturn_t ccp_irq_handler(int irq, void *data) +{ +	struct device *dev = data; +	struct ccp_device *ccp = dev_get_drvdata(dev); +	struct ccp_cmd_queue *cmd_q; +	u32 q_int, status; +	unsigned int i; + +	status = ioread32(ccp->io_regs + IRQ_STATUS_REG); + +	for (i = 0; i < ccp->cmd_q_count; i++) { +		cmd_q = &ccp->cmd_q[i]; + +		q_int = status & (cmd_q->int_ok | cmd_q->int_err); +		if (q_int) { +			cmd_q->int_status = status; +			cmd_q->q_status = ioread32(cmd_q->reg_status); +			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); + +			/* On error, only save the first error value */ +			if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) +				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); + +			cmd_q->int_rcvd = 1; + +			/* Acknowledge the interrupt and wake the kthread */ +			iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); +			wake_up_interruptible(&cmd_q->int_queue); +		} +	} + +	return IRQ_HANDLED; +} + +#ifdef CONFIG_PM +bool ccp_queues_suspended(struct ccp_device *ccp) +{ +	unsigned int suspended = 0; +	unsigned long flags; +	unsigned int i; + +	spin_lock_irqsave(&ccp->cmd_lock, flags); + +	for (i = 0; i < ccp->cmd_q_count; i++) +		if (ccp->cmd_q[i].suspended) +			suspended++; + +	spin_unlock_irqrestore(&ccp->cmd_lock, flags); + +	return ccp->cmd_q_count == suspended; +} +#endif + +static const struct x86_cpu_id ccp_support[] = { +	{ X86_VENDOR_AMD, 22, }, +}; + +static int __init ccp_mod_init(void) +{ +	struct cpuinfo_x86 *cpuinfo = &boot_cpu_data; +	int ret; + +	if (!x86_match_cpu(ccp_support)) +		return -ENODEV; + +	switch (cpuinfo->x86) { +	case 22: +		if ((cpuinfo->x86_model < 48) || (cpuinfo->x86_model > 63)) +			return -ENODEV; + +		ret = ccp_pci_init(); +		if (ret) +			return ret; + +		/* Don't leave the driver loaded if init failed */ +		if (!ccp_get_device()) { +			ccp_pci_exit(); +			return -ENODEV; +		} + +		return 0; + +		break; +	} + +	return -ENODEV; +} + +static void __exit ccp_mod_exit(void) +{ +	struct cpuinfo_x86 *cpuinfo = &boot_cpu_data; + +	switch (cpuinfo->x86) { +	case 22: +		ccp_pci_exit(); +		break; +	} +} + +module_init(ccp_mod_init); +module_exit(ccp_mod_exit); diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h new file mode 100644 index 00000000000..7ec536e702e --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.h @@ -0,0 +1,272 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CCP_DEV_H__ +#define __CCP_DEV_H__ + +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/dmapool.h> +#include <linux/hw_random.h> + + +#define IO_OFFSET			0x20000 + +#define MAX_DMAPOOL_NAME_LEN		32 + +#define MAX_HW_QUEUES			5 +#define MAX_CMD_QLEN			100 + +#define TRNG_RETRIES			10 + + +/****** Register Mappings ******/ +#define Q_MASK_REG			0x000 +#define TRNG_OUT_REG			0x00c +#define IRQ_MASK_REG			0x040 +#define IRQ_STATUS_REG			0x200 + +#define DEL_CMD_Q_JOB			0x124 +#define DEL_Q_ACTIVE			0x00000200 +#define DEL_Q_ID_SHIFT			6 + +#define CMD_REQ0			0x180 +#define CMD_REQ_INCR			0x04 + +#define CMD_Q_STATUS_BASE		0x210 +#define CMD_Q_INT_STATUS_BASE		0x214 +#define CMD_Q_STATUS_INCR		0x20 + +#define CMD_Q_CACHE			0x228 +#define CMD_Q_CACHE_INC			0x20 + +#define CMD_Q_ERROR(__qs)		((__qs) & 0x0000003f); +#define CMD_Q_DEPTH(__qs)		(((__qs) >> 12) & 0x0000000f); + +/****** REQ0 Related Values ******/ +#define REQ0_WAIT_FOR_WRITE		0x00000004 +#define REQ0_INT_ON_COMPLETE		0x00000002 +#define REQ0_STOP_ON_COMPLETE		0x00000001 + +#define REQ0_CMD_Q_SHIFT		9 +#define REQ0_JOBID_SHIFT		3 + +/****** REQ1 Related Values ******/ +#define REQ1_PROTECT_SHIFT		27 +#define REQ1_ENGINE_SHIFT		23 +#define REQ1_KEY_KSB_SHIFT		2 + +#define REQ1_EOM			0x00000002 +#define REQ1_INIT			0x00000001 + +/* AES Related Values */ +#define REQ1_AES_TYPE_SHIFT		21 +#define REQ1_AES_MODE_SHIFT		18 +#define REQ1_AES_ACTION_SHIFT		17 +#define REQ1_AES_CFB_SIZE_SHIFT		10 + +/* XTS-AES Related Values */ +#define REQ1_XTS_AES_SIZE_SHIFT		10 + +/* SHA Related Values */ +#define REQ1_SHA_TYPE_SHIFT		21 + +/* RSA Related Values */ +#define REQ1_RSA_MOD_SIZE_SHIFT		10 + +/* Pass-Through Related Values */ +#define REQ1_PT_BW_SHIFT		12 +#define REQ1_PT_BS_SHIFT		10 + +/* ECC Related Values */ +#define REQ1_ECC_AFFINE_CONVERT		0x00200000 +#define REQ1_ECC_FUNCTION_SHIFT		18 + +/****** REQ4 Related Values ******/ +#define REQ4_KSB_SHIFT			18 +#define REQ4_MEMTYPE_SHIFT		16 + +/****** REQ6 Related Values ******/ +#define REQ6_MEMTYPE_SHIFT		16 + + +/****** Key Storage Block ******/ +#define KSB_START			77 +#define KSB_END				127 +#define KSB_COUNT			(KSB_END - KSB_START + 1) +#define CCP_KSB_BITS			256 +#define CCP_KSB_BYTES			32 + +#define CCP_JOBID_MASK			0x0000003f + +#define CCP_DMAPOOL_MAX_SIZE		64 +#define CCP_DMAPOOL_ALIGN		(1 << 5) + +#define CCP_REVERSE_BUF_SIZE		64 + +#define CCP_AES_KEY_KSB_COUNT		1 +#define CCP_AES_CTX_KSB_COUNT		1 + +#define CCP_XTS_AES_KEY_KSB_COUNT	1 +#define CCP_XTS_AES_CTX_KSB_COUNT	1 + +#define CCP_SHA_KSB_COUNT		1 + +#define CCP_RSA_MAX_WIDTH		4096 + +#define CCP_PASSTHRU_BLOCKSIZE		256 +#define CCP_PASSTHRU_MASKSIZE		32 +#define CCP_PASSTHRU_KSB_COUNT		1 + +#define CCP_ECC_MODULUS_BYTES		48      /* 384-bits */ +#define CCP_ECC_MAX_OPERANDS		6 +#define CCP_ECC_MAX_OUTPUTS		3 +#define CCP_ECC_SRC_BUF_SIZE		448 +#define CCP_ECC_DST_BUF_SIZE		192 +#define CCP_ECC_OPERAND_SIZE		64 +#define CCP_ECC_OUTPUT_SIZE		64 +#define CCP_ECC_RESULT_OFFSET		60 +#define CCP_ECC_RESULT_SUCCESS		0x0001 + + +struct ccp_device; +struct ccp_cmd; + +struct ccp_cmd_queue { +	struct ccp_device *ccp; + +	/* Queue identifier */ +	u32 id; + +	/* Queue dma pool */ +	struct dma_pool *dma_pool; + +	/* Queue reserved KSB regions */ +	u32 ksb_key; +	u32 ksb_ctx; + +	/* Queue processing thread */ +	struct task_struct *kthread; +	unsigned int active; +	unsigned int suspended; + +	/* Number of free command slots available */ +	unsigned int free_slots; + +	/* Interrupt masks */ +	u32 int_ok; +	u32 int_err; + +	/* Register addresses for queue */ +	void __iomem *reg_status; +	void __iomem *reg_int_status; + +	/* Status values from job */ +	u32 int_status; +	u32 q_status; +	u32 q_int_status; +	u32 cmd_error; + +	/* Interrupt wait queue */ +	wait_queue_head_t int_queue; +	unsigned int int_rcvd; +} ____cacheline_aligned; + +struct ccp_device { +	struct device *dev; + +	/* +	 * Bus specific device information +	 */ +	void *dev_specific; +	int (*get_irq)(struct ccp_device *ccp); +	void (*free_irq)(struct ccp_device *ccp); + +	/* +	 * I/O area used for device communication. The register mapping +	 * starts at an offset into the mapped bar. +	 *   The CMD_REQx registers and the Delete_Cmd_Queue_Job register +	 *   need to be protected while a command queue thread is accessing +	 *   them. +	 */ +	struct mutex req_mutex ____cacheline_aligned; +	void __iomem *io_map; +	void __iomem *io_regs; + +	/* +	 * Master lists that all cmds are queued on. Because there can be +	 * more than one CCP command queue that can process a cmd a separate +	 * backlog list is neeeded so that the backlog completion call +	 * completes before the cmd is available for execution. +	 */ +	spinlock_t cmd_lock ____cacheline_aligned; +	unsigned int cmd_count; +	struct list_head cmd; +	struct list_head backlog; + +	/* +	 * The command queues. These represent the queues available on the +	 * CCP that are available for processing cmds +	 */ +	struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES]; +	unsigned int cmd_q_count; + +	/* +	 * Support for the CCP True RNG +	 */ +	struct hwrng hwrng; +	unsigned int hwrng_retries; + +	/* +	 * A counter used to generate job-ids for cmds submitted to the CCP +	 */ +	atomic_t current_id ____cacheline_aligned; + +	/* +	 * The CCP uses key storage blocks (KSB) to maintain context for certain +	 * operations. To prevent multiple cmds from using the same KSB range +	 * a command queue reserves a KSB range for the duration of the cmd. +	 * Each queue, will however, reserve 2 KSB blocks for operations that +	 * only require single KSB entries (eg. AES context/iv and key) in order +	 * to avoid allocation contention.  This will reserve at most 10 KSB +	 * entries, leaving 40 KSB entries available for dynamic allocation. +	 */ +	struct mutex ksb_mutex ____cacheline_aligned; +	DECLARE_BITMAP(ksb, KSB_COUNT); +	wait_queue_head_t ksb_queue; +	unsigned int ksb_avail; +	unsigned int ksb_count; +	u32 ksb_start; + +	/* Suspend support */ +	unsigned int suspending; +	wait_queue_head_t suspend_queue; +}; + + +int ccp_pci_init(void); +void ccp_pci_exit(void); + +struct ccp_device *ccp_alloc_struct(struct device *dev); +int ccp_init(struct ccp_device *ccp); +void ccp_destroy(struct ccp_device *ccp); +bool ccp_queues_suspended(struct ccp_device *ccp); + +irqreturn_t ccp_irq_handler(int irq, void *data); + +int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd); + +#endif diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c new file mode 100644 index 00000000000..9ae006d69df --- /dev/null +++ b/drivers/crypto/ccp/ccp-ops.c @@ -0,0 +1,2126 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/ccp.h> +#include <linux/scatterlist.h> +#include <crypto/scatterwalk.h> +#include <crypto/sha.h> + +#include "ccp-dev.h" + + +enum ccp_memtype { +	CCP_MEMTYPE_SYSTEM = 0, +	CCP_MEMTYPE_KSB, +	CCP_MEMTYPE_LOCAL, +	CCP_MEMTYPE__LAST, +}; + +struct ccp_dma_info { +	dma_addr_t address; +	unsigned int offset; +	unsigned int length; +	enum dma_data_direction dir; +}; + +struct ccp_dm_workarea { +	struct device *dev; +	struct dma_pool *dma_pool; +	unsigned int length; + +	u8 *address; +	struct ccp_dma_info dma; +}; + +struct ccp_sg_workarea { +	struct scatterlist *sg; +	unsigned int nents; +	unsigned int length; + +	struct scatterlist *dma_sg; +	struct device *dma_dev; +	unsigned int dma_count; +	enum dma_data_direction dma_dir; + +	unsigned int sg_used; + +	u64 bytes_left; +}; + +struct ccp_data { +	struct ccp_sg_workarea sg_wa; +	struct ccp_dm_workarea dm_wa; +}; + +struct ccp_mem { +	enum ccp_memtype type; +	union { +		struct ccp_dma_info dma; +		u32 ksb; +	} u; +}; + +struct ccp_aes_op { +	enum ccp_aes_type type; +	enum ccp_aes_mode mode; +	enum ccp_aes_action action; +}; + +struct ccp_xts_aes_op { +	enum ccp_aes_action action; +	enum ccp_xts_aes_unit_size unit_size; +}; + +struct ccp_sha_op { +	enum ccp_sha_type type; +	u64 msg_bits; +}; + +struct ccp_rsa_op { +	u32 mod_size; +	u32 input_len; +}; + +struct ccp_passthru_op { +	enum ccp_passthru_bitwise bit_mod; +	enum ccp_passthru_byteswap byte_swap; +}; + +struct ccp_ecc_op { +	enum ccp_ecc_function function; +}; + +struct ccp_op { +	struct ccp_cmd_queue *cmd_q; + +	u32 jobid; +	u32 ioc; +	u32 soc; +	u32 ksb_key; +	u32 ksb_ctx; +	u32 init; +	u32 eom; + +	struct ccp_mem src; +	struct ccp_mem dst; + +	union { +		struct ccp_aes_op aes; +		struct ccp_xts_aes_op xts; +		struct ccp_sha_op sha; +		struct ccp_rsa_op rsa; +		struct ccp_passthru_op passthru; +		struct ccp_ecc_op ecc; +	} u; +}; + +/* SHA initial context values */ +static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { +	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1), +	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3), +	cpu_to_be32(SHA1_H4), 0, 0, 0, +}; + +static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { +	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1), +	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3), +	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5), +	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7), +}; + +static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { +	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1), +	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3), +	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5), +	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7), +}; + +/* The CCP cannot perform zero-length sha operations so the caller + * is required to buffer data for the final operation.  However, a + * sha operation for a message with a total length of zero is valid + * so known values are required to supply the result. + */ +static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = { +	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, +	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, +	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00, +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = { +	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, +	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, +	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, +	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = { +	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, +	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, +	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, +	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55, +}; + +static u32 ccp_addr_lo(struct ccp_dma_info *info) +{ +	return lower_32_bits(info->address + info->offset); +} + +static u32 ccp_addr_hi(struct ccp_dma_info *info) +{ +	return upper_32_bits(info->address + info->offset) & 0x0000ffff; +} + +static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) +{ +	struct ccp_cmd_queue *cmd_q = op->cmd_q; +	struct ccp_device *ccp = cmd_q->ccp; +	void __iomem *cr_addr; +	u32 cr0, cmd; +	unsigned int i; +	int ret = 0; + +	/* We could read a status register to see how many free slots +	 * are actually available, but reading that register resets it +	 * and you could lose some error information. +	 */ +	cmd_q->free_slots--; + +	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT) +	      | (op->jobid << REQ0_JOBID_SHIFT) +	      | REQ0_WAIT_FOR_WRITE; + +	if (op->soc) +		cr0 |= REQ0_STOP_ON_COMPLETE +		       | REQ0_INT_ON_COMPLETE; + +	if (op->ioc || !cmd_q->free_slots) +		cr0 |= REQ0_INT_ON_COMPLETE; + +	/* Start at CMD_REQ1 */ +	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR; + +	mutex_lock(&ccp->req_mutex); + +	/* Write CMD_REQ1 through CMD_REQx first */ +	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR) +		iowrite32(*(cr + i), cr_addr); + +	/* Tell the CCP to start */ +	wmb(); +	iowrite32(cr0, ccp->io_regs + CMD_REQ0); + +	mutex_unlock(&ccp->req_mutex); + +	if (cr0 & REQ0_INT_ON_COMPLETE) { +		/* Wait for the job to complete */ +		ret = wait_event_interruptible(cmd_q->int_queue, +					       cmd_q->int_rcvd); +		if (ret || cmd_q->cmd_error) { +			/* On error delete all related jobs from the queue */ +			cmd = (cmd_q->id << DEL_Q_ID_SHIFT) +			      | op->jobid; + +			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); + +			if (!ret) +				ret = -EIO; +		} else if (op->soc) { +			/* Delete just head job from the queue on SoC */ +			cmd = DEL_Q_ACTIVE +			      | (cmd_q->id << DEL_Q_ID_SHIFT) +			      | op->jobid; + +			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); +		} + +		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status); + +		cmd_q->int_rcvd = 0; +	} + +	return ret; +} + +static int ccp_perform_aes(struct ccp_op *op) +{ +	u32 cr[6]; + +	/* Fill out the register contents for REQ1 through REQ6 */ +	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT) +		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT) +		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT) +		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT) +		| (op->ksb_key << REQ1_KEY_KSB_SHIFT); +	cr[1] = op->src.u.dma.length - 1; +	cr[2] = ccp_addr_lo(&op->src.u.dma); +	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) +		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) +		| ccp_addr_hi(&op->src.u.dma); +	cr[4] = ccp_addr_lo(&op->dst.u.dma); +	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) +		| ccp_addr_hi(&op->dst.u.dma); + +	if (op->u.aes.mode == CCP_AES_MODE_CFB) +		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT); + +	if (op->eom) +		cr[0] |= REQ1_EOM; + +	if (op->init) +		cr[0] |= REQ1_INIT; + +	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_xts_aes(struct ccp_op *op) +{ +	u32 cr[6]; + +	/* Fill out the register contents for REQ1 through REQ6 */ +	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) +		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT) +		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) +		| (op->ksb_key << REQ1_KEY_KSB_SHIFT); +	cr[1] = op->src.u.dma.length - 1; +	cr[2] = ccp_addr_lo(&op->src.u.dma); +	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) +		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) +		| ccp_addr_hi(&op->src.u.dma); +	cr[4] = ccp_addr_lo(&op->dst.u.dma); +	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) +		| ccp_addr_hi(&op->dst.u.dma); + +	if (op->eom) +		cr[0] |= REQ1_EOM; + +	if (op->init) +		cr[0] |= REQ1_INIT; + +	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_sha(struct ccp_op *op) +{ +	u32 cr[6]; + +	/* Fill out the register contents for REQ1 through REQ6 */ +	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT) +		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT) +		| REQ1_INIT; +	cr[1] = op->src.u.dma.length - 1; +	cr[2] = ccp_addr_lo(&op->src.u.dma); +	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) +		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) +		| ccp_addr_hi(&op->src.u.dma); + +	if (op->eom) { +		cr[0] |= REQ1_EOM; +		cr[4] = lower_32_bits(op->u.sha.msg_bits); +		cr[5] = upper_32_bits(op->u.sha.msg_bits); +	} else { +		cr[4] = 0; +		cr[5] = 0; +	} + +	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_rsa(struct ccp_op *op) +{ +	u32 cr[6]; + +	/* Fill out the register contents for REQ1 through REQ6 */ +	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT) +		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT) +		| (op->ksb_key << REQ1_KEY_KSB_SHIFT) +		| REQ1_EOM; +	cr[1] = op->u.rsa.input_len - 1; +	cr[2] = ccp_addr_lo(&op->src.u.dma); +	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) +		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) +		| ccp_addr_hi(&op->src.u.dma); +	cr[4] = ccp_addr_lo(&op->dst.u.dma); +	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) +		| ccp_addr_hi(&op->dst.u.dma); + +	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_passthru(struct ccp_op *op) +{ +	u32 cr[6]; + +	/* Fill out the register contents for REQ1 through REQ6 */ +	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT) +		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT) +		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT); + +	if (op->src.type == CCP_MEMTYPE_SYSTEM) +		cr[1] = op->src.u.dma.length - 1; +	else +		cr[1] = op->dst.u.dma.length - 1; + +	if (op->src.type == CCP_MEMTYPE_SYSTEM) { +		cr[2] = ccp_addr_lo(&op->src.u.dma); +		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) +			| ccp_addr_hi(&op->src.u.dma); + +		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) +			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT); +	} else { +		cr[2] = op->src.u.ksb * CCP_KSB_BYTES; +		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT); +	} + +	if (op->dst.type == CCP_MEMTYPE_SYSTEM) { +		cr[4] = ccp_addr_lo(&op->dst.u.dma); +		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) +			| ccp_addr_hi(&op->dst.u.dma); +	} else { +		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES; +		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT); +	} + +	if (op->eom) +		cr[0] |= REQ1_EOM; + +	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_ecc(struct ccp_op *op) +{ +	u32 cr[6]; + +	/* Fill out the register contents for REQ1 through REQ6 */ +	cr[0] = REQ1_ECC_AFFINE_CONVERT +		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT) +		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT) +		| REQ1_EOM; +	cr[1] = op->src.u.dma.length - 1; +	cr[2] = ccp_addr_lo(&op->src.u.dma); +	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) +		| ccp_addr_hi(&op->src.u.dma); +	cr[4] = ccp_addr_lo(&op->dst.u.dma); +	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) +		| ccp_addr_hi(&op->dst.u.dma); + +	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count) +{ +	int start; + +	for (;;) { +		mutex_lock(&ccp->ksb_mutex); + +		start = (u32)bitmap_find_next_zero_area(ccp->ksb, +							ccp->ksb_count, +							ccp->ksb_start, +							count, 0); +		if (start <= ccp->ksb_count) { +			bitmap_set(ccp->ksb, start, count); + +			mutex_unlock(&ccp->ksb_mutex); +			break; +		} + +		ccp->ksb_avail = 0; + +		mutex_unlock(&ccp->ksb_mutex); + +		/* Wait for KSB entries to become available */ +		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail)) +			return 0; +	} + +	return KSB_START + start; +} + +static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start, +			 unsigned int count) +{ +	if (!start) +		return; + +	mutex_lock(&ccp->ksb_mutex); + +	bitmap_clear(ccp->ksb, start - KSB_START, count); + +	ccp->ksb_avail = 1; + +	mutex_unlock(&ccp->ksb_mutex); + +	wake_up_interruptible_all(&ccp->ksb_queue); +} + +static u32 ccp_gen_jobid(struct ccp_device *ccp) +{ +	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK; +} + +static void ccp_sg_free(struct ccp_sg_workarea *wa) +{ +	if (wa->dma_count) +		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir); + +	wa->dma_count = 0; +} + +static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev, +				struct scatterlist *sg, u64 len, +				enum dma_data_direction dma_dir) +{ +	memset(wa, 0, sizeof(*wa)); + +	wa->sg = sg; +	if (!sg) +		return 0; + +	wa->nents = sg_nents(sg); +	wa->length = sg->length; +	wa->bytes_left = len; +	wa->sg_used = 0; + +	if (len == 0) +		return 0; + +	if (dma_dir == DMA_NONE) +		return 0; + +	wa->dma_sg = sg; +	wa->dma_dev = dev; +	wa->dma_dir = dma_dir; +	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir); +	if (!wa->dma_count) +		return -ENOMEM; + + +	return 0; +} + +static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len) +{ +	unsigned int nbytes = min_t(u64, len, wa->bytes_left); + +	if (!wa->sg) +		return; + +	wa->sg_used += nbytes; +	wa->bytes_left -= nbytes; +	if (wa->sg_used == wa->sg->length) { +		wa->sg = sg_next(wa->sg); +		wa->sg_used = 0; +	} +} + +static void ccp_dm_free(struct ccp_dm_workarea *wa) +{ +	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) { +		if (wa->address) +			dma_pool_free(wa->dma_pool, wa->address, +				      wa->dma.address); +	} else { +		if (wa->dma.address) +			dma_unmap_single(wa->dev, wa->dma.address, wa->length, +					 wa->dma.dir); +		kfree(wa->address); +	} + +	wa->address = NULL; +	wa->dma.address = 0; +} + +static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa, +				struct ccp_cmd_queue *cmd_q, +				unsigned int len, +				enum dma_data_direction dir) +{ +	memset(wa, 0, sizeof(*wa)); + +	if (!len) +		return 0; + +	wa->dev = cmd_q->ccp->dev; +	wa->length = len; + +	if (len <= CCP_DMAPOOL_MAX_SIZE) { +		wa->dma_pool = cmd_q->dma_pool; + +		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL, +					     &wa->dma.address); +		if (!wa->address) +			return -ENOMEM; + +		wa->dma.length = CCP_DMAPOOL_MAX_SIZE; + +		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE); +	} else { +		wa->address = kzalloc(len, GFP_KERNEL); +		if (!wa->address) +			return -ENOMEM; + +		wa->dma.address = dma_map_single(wa->dev, wa->address, len, +						 dir); +		if (!wa->dma.address) +			return -ENOMEM; + +		wa->dma.length = len; +	} +	wa->dma.dir = dir; + +	return 0; +} + +static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, +			    struct scatterlist *sg, unsigned int sg_offset, +			    unsigned int len) +{ +	WARN_ON(!wa->address); + +	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, +				 0); +} + +static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, +			    struct scatterlist *sg, unsigned int sg_offset, +			    unsigned int len) +{ +	WARN_ON(!wa->address); + +	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, +				 1); +} + +static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa, +				    struct scatterlist *sg, +				    unsigned int len, unsigned int se_len, +				    bool sign_extend) +{ +	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i; +	u8 buffer[CCP_REVERSE_BUF_SIZE]; + +	BUG_ON(se_len > sizeof(buffer)); + +	sg_offset = len; +	dm_offset = 0; +	nbytes = len; +	while (nbytes) { +		ksb_len = min_t(unsigned int, nbytes, se_len); +		sg_offset -= ksb_len; + +		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0); +		for (i = 0; i < ksb_len; i++) +			wa->address[dm_offset + i] = buffer[ksb_len - i - 1]; + +		dm_offset += ksb_len; +		nbytes -= ksb_len; + +		if ((ksb_len != se_len) && sign_extend) { +			/* Must sign-extend to nearest sign-extend length */ +			if (wa->address[dm_offset - 1] & 0x80) +				memset(wa->address + dm_offset, 0xff, +				       se_len - ksb_len); +		} +	} +} + +static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa, +				    struct scatterlist *sg, +				    unsigned int len) +{ +	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i; +	u8 buffer[CCP_REVERSE_BUF_SIZE]; + +	sg_offset = 0; +	dm_offset = len; +	nbytes = len; +	while (nbytes) { +		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer)); +		dm_offset -= ksb_len; + +		for (i = 0; i < ksb_len; i++) +			buffer[ksb_len - i - 1] = wa->address[dm_offset + i]; +		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1); + +		sg_offset += ksb_len; +		nbytes -= ksb_len; +	} +} + +static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q) +{ +	ccp_dm_free(&data->dm_wa); +	ccp_sg_free(&data->sg_wa); +} + +static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q, +			 struct scatterlist *sg, u64 sg_len, +			 unsigned int dm_len, +			 enum dma_data_direction dir) +{ +	int ret; + +	memset(data, 0, sizeof(*data)); + +	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len, +				   dir); +	if (ret) +		goto e_err; + +	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir); +	if (ret) +		goto e_err; + +	return 0; + +e_err: +	ccp_free_data(data, cmd_q); + +	return ret; +} + +static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from) +{ +	struct ccp_sg_workarea *sg_wa = &data->sg_wa; +	struct ccp_dm_workarea *dm_wa = &data->dm_wa; +	unsigned int buf_count, nbytes; + +	/* Clear the buffer if setting it */ +	if (!from) +		memset(dm_wa->address, 0, dm_wa->length); + +	if (!sg_wa->sg) +		return 0; + +	/* Perform the copy operation +	 *   nbytes will always be <= UINT_MAX because dm_wa->length is +	 *   an unsigned int +	 */ +	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length); +	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used, +				 nbytes, from); + +	/* Update the structures and generate the count */ +	buf_count = 0; +	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) { +		nbytes = min(sg_wa->sg->length - sg_wa->sg_used, +			     dm_wa->length - buf_count); +		nbytes = min_t(u64, sg_wa->bytes_left, nbytes); + +		buf_count += nbytes; +		ccp_update_sg_workarea(sg_wa, nbytes); +	} + +	return buf_count; +} + +static unsigned int ccp_fill_queue_buf(struct ccp_data *data) +{ +	return ccp_queue_buf(data, 0); +} + +static unsigned int ccp_empty_queue_buf(struct ccp_data *data) +{ +	return ccp_queue_buf(data, 1); +} + +static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst, +			     struct ccp_op *op, unsigned int block_size, +			     bool blocksize_op) +{ +	unsigned int sg_src_len, sg_dst_len, op_len; + +	/* The CCP can only DMA from/to one address each per operation. This +	 * requires that we find the smallest DMA area between the source +	 * and destination. The resulting len values will always be <= UINT_MAX +	 * because the dma length is an unsigned int. +	 */ +	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used; +	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len); + +	if (dst) { +		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used; +		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len); +		op_len = min(sg_src_len, sg_dst_len); +	} else +		op_len = sg_src_len; + +	/* The data operation length will be at least block_size in length +	 * or the smaller of available sg room remaining for the source or +	 * the destination +	 */ +	op_len = max(op_len, block_size); + +	/* Unless we have to buffer data, there's no reason to wait */ +	op->soc = 0; + +	if (sg_src_len < block_size) { +		/* Not enough data in the sg element, so it +		 * needs to be buffered into a blocksize chunk +		 */ +		int cp_len = ccp_fill_queue_buf(src); + +		op->soc = 1; +		op->src.u.dma.address = src->dm_wa.dma.address; +		op->src.u.dma.offset = 0; +		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len; +	} else { +		/* Enough data in the sg element, but we need to +		 * adjust for any previously copied data +		 */ +		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg); +		op->src.u.dma.offset = src->sg_wa.sg_used; +		op->src.u.dma.length = op_len & ~(block_size - 1); + +		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length); +	} + +	if (dst) { +		if (sg_dst_len < block_size) { +			/* Not enough room in the sg element or we're on the +			 * last piece of data (when using padding), so the +			 * output needs to be buffered into a blocksize chunk +			 */ +			op->soc = 1; +			op->dst.u.dma.address = dst->dm_wa.dma.address; +			op->dst.u.dma.offset = 0; +			op->dst.u.dma.length = op->src.u.dma.length; +		} else { +			/* Enough room in the sg element, but we need to +			 * adjust for any previously used area +			 */ +			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg); +			op->dst.u.dma.offset = dst->sg_wa.sg_used; +			op->dst.u.dma.length = op->src.u.dma.length; +		} +	} +} + +static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst, +			     struct ccp_op *op) +{ +	op->init = 0; + +	if (dst) { +		if (op->dst.u.dma.address == dst->dm_wa.dma.address) +			ccp_empty_queue_buf(dst); +		else +			ccp_update_sg_workarea(&dst->sg_wa, +					       op->dst.u.dma.length); +	} +} + +static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q, +				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, +				u32 byte_swap, bool from) +{ +	struct ccp_op op; + +	memset(&op, 0, sizeof(op)); + +	op.cmd_q = cmd_q; +	op.jobid = jobid; +	op.eom = 1; + +	if (from) { +		op.soc = 1; +		op.src.type = CCP_MEMTYPE_KSB; +		op.src.u.ksb = ksb; +		op.dst.type = CCP_MEMTYPE_SYSTEM; +		op.dst.u.dma.address = wa->dma.address; +		op.dst.u.dma.length = wa->length; +	} else { +		op.src.type = CCP_MEMTYPE_SYSTEM; +		op.src.u.dma.address = wa->dma.address; +		op.src.u.dma.length = wa->length; +		op.dst.type = CCP_MEMTYPE_KSB; +		op.dst.u.ksb = ksb; +	} + +	op.u.passthru.byte_swap = byte_swap; + +	return ccp_perform_passthru(&op); +} + +static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q, +			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, +			   u32 byte_swap) +{ +	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false); +} + +static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q, +			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, +			     u32 byte_swap) +{ +	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true); +} + +static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, +				struct ccp_cmd *cmd) +{ +	struct ccp_aes_engine *aes = &cmd->u.aes; +	struct ccp_dm_workarea key, ctx; +	struct ccp_data src; +	struct ccp_op op; +	unsigned int dm_offset; +	int ret; + +	if (!((aes->key_len == AES_KEYSIZE_128) || +	      (aes->key_len == AES_KEYSIZE_192) || +	      (aes->key_len == AES_KEYSIZE_256))) +		return -EINVAL; + +	if (aes->src_len & (AES_BLOCK_SIZE - 1)) +		return -EINVAL; + +	if (aes->iv_len != AES_BLOCK_SIZE) +		return -EINVAL; + +	if (!aes->key || !aes->iv || !aes->src) +		return -EINVAL; + +	if (aes->cmac_final) { +		if (aes->cmac_key_len != AES_BLOCK_SIZE) +			return -EINVAL; + +		if (!aes->cmac_key) +			return -EINVAL; +	} + +	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1); +	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1); + +	ret = -EIO; +	memset(&op, 0, sizeof(op)); +	op.cmd_q = cmd_q; +	op.jobid = ccp_gen_jobid(cmd_q->ccp); +	op.ksb_key = cmd_q->ksb_key; +	op.ksb_ctx = cmd_q->ksb_ctx; +	op.init = 1; +	op.u.aes.type = aes->type; +	op.u.aes.mode = aes->mode; +	op.u.aes.action = aes->action; + +	/* All supported key sizes fit in a single (32-byte) KSB entry +	 * and must be in little endian format. Use the 256-bit byte +	 * swap passthru option to convert from big endian to little +	 * endian. +	 */ +	ret = ccp_init_dm_workarea(&key, cmd_q, +				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, +				   DMA_TO_DEVICE); +	if (ret) +		return ret; + +	dm_offset = CCP_KSB_BYTES - aes->key_len; +	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); +	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, +			      CCP_PASSTHRU_BYTESWAP_256BIT); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_key; +	} + +	/* The AES context fits in a single (32-byte) KSB entry and +	 * must be in little endian format. Use the 256-bit byte swap +	 * passthru option to convert from big endian to little endian. +	 */ +	ret = ccp_init_dm_workarea(&ctx, cmd_q, +				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, +				   DMA_BIDIRECTIONAL); +	if (ret) +		goto e_key; + +	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; +	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); +	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, +			      CCP_PASSTHRU_BYTESWAP_256BIT); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_ctx; +	} + +	/* Send data to the CCP AES engine */ +	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, +			    AES_BLOCK_SIZE, DMA_TO_DEVICE); +	if (ret) +		goto e_ctx; + +	while (src.sg_wa.bytes_left) { +		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true); +		if (aes->cmac_final && !src.sg_wa.bytes_left) { +			op.eom = 1; + +			/* Push the K1/K2 key to the CCP now */ +			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, +						op.ksb_ctx, +						CCP_PASSTHRU_BYTESWAP_256BIT); +			if (ret) { +				cmd->engine_error = cmd_q->cmd_error; +				goto e_src; +			} + +			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0, +					aes->cmac_key_len); +			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, +					      CCP_PASSTHRU_BYTESWAP_256BIT); +			if (ret) { +				cmd->engine_error = cmd_q->cmd_error; +				goto e_src; +			} +		} + +		ret = ccp_perform_aes(&op); +		if (ret) { +			cmd->engine_error = cmd_q->cmd_error; +			goto e_src; +		} + +		ccp_process_data(&src, NULL, &op); +	} + +	/* Retrieve the AES context - convert from LE to BE using +	 * 32-byte (256-bit) byteswapping +	 */ +	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, +				CCP_PASSTHRU_BYTESWAP_256BIT); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_src; +	} + +	/* ...but we only need AES_BLOCK_SIZE bytes */ +	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; +	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + +e_src: +	ccp_free_data(&src, cmd_q); + +e_ctx: +	ccp_dm_free(&ctx); + +e_key: +	ccp_dm_free(&key); + +	return ret; +} + +static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ +	struct ccp_aes_engine *aes = &cmd->u.aes; +	struct ccp_dm_workarea key, ctx; +	struct ccp_data src, dst; +	struct ccp_op op; +	unsigned int dm_offset; +	bool in_place = false; +	int ret; + +	if (aes->mode == CCP_AES_MODE_CMAC) +		return ccp_run_aes_cmac_cmd(cmd_q, cmd); + +	if (!((aes->key_len == AES_KEYSIZE_128) || +	      (aes->key_len == AES_KEYSIZE_192) || +	      (aes->key_len == AES_KEYSIZE_256))) +		return -EINVAL; + +	if (((aes->mode == CCP_AES_MODE_ECB) || +	     (aes->mode == CCP_AES_MODE_CBC) || +	     (aes->mode == CCP_AES_MODE_CFB)) && +	    (aes->src_len & (AES_BLOCK_SIZE - 1))) +		return -EINVAL; + +	if (!aes->key || !aes->src || !aes->dst) +		return -EINVAL; + +	if (aes->mode != CCP_AES_MODE_ECB) { +		if (aes->iv_len != AES_BLOCK_SIZE) +			return -EINVAL; + +		if (!aes->iv) +			return -EINVAL; +	} + +	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1); +	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1); + +	ret = -EIO; +	memset(&op, 0, sizeof(op)); +	op.cmd_q = cmd_q; +	op.jobid = ccp_gen_jobid(cmd_q->ccp); +	op.ksb_key = cmd_q->ksb_key; +	op.ksb_ctx = cmd_q->ksb_ctx; +	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1; +	op.u.aes.type = aes->type; +	op.u.aes.mode = aes->mode; +	op.u.aes.action = aes->action; + +	/* All supported key sizes fit in a single (32-byte) KSB entry +	 * and must be in little endian format. Use the 256-bit byte +	 * swap passthru option to convert from big endian to little +	 * endian. +	 */ +	ret = ccp_init_dm_workarea(&key, cmd_q, +				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, +				   DMA_TO_DEVICE); +	if (ret) +		return ret; + +	dm_offset = CCP_KSB_BYTES - aes->key_len; +	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); +	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, +			      CCP_PASSTHRU_BYTESWAP_256BIT); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_key; +	} + +	/* The AES context fits in a single (32-byte) KSB entry and +	 * must be in little endian format. Use the 256-bit byte swap +	 * passthru option to convert from big endian to little endian. +	 */ +	ret = ccp_init_dm_workarea(&ctx, cmd_q, +				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, +				   DMA_BIDIRECTIONAL); +	if (ret) +		goto e_key; + +	if (aes->mode != CCP_AES_MODE_ECB) { +		/* Load the AES context - conver to LE */ +		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; +		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); +		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, +				      CCP_PASSTHRU_BYTESWAP_256BIT); +		if (ret) { +			cmd->engine_error = cmd_q->cmd_error; +			goto e_ctx; +		} +	} + +	/* Prepare the input and output data workareas. For in-place +	 * operations we need to set the dma direction to BIDIRECTIONAL +	 * and copy the src workarea to the dst workarea. +	 */ +	if (sg_virt(aes->src) == sg_virt(aes->dst)) +		in_place = true; + +	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, +			    AES_BLOCK_SIZE, +			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); +	if (ret) +		goto e_ctx; + +	if (in_place) +		dst = src; +	else { +		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len, +				    AES_BLOCK_SIZE, DMA_FROM_DEVICE); +		if (ret) +			goto e_src; +	} + +	/* Send data to the CCP AES engine */ +	while (src.sg_wa.bytes_left) { +		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true); +		if (!src.sg_wa.bytes_left) { +			op.eom = 1; + +			/* Since we don't retrieve the AES context in ECB +			 * mode we have to wait for the operation to complete +			 * on the last piece of data +			 */ +			if (aes->mode == CCP_AES_MODE_ECB) +				op.soc = 1; +		} + +		ret = ccp_perform_aes(&op); +		if (ret) { +			cmd->engine_error = cmd_q->cmd_error; +			goto e_dst; +		} + +		ccp_process_data(&src, &dst, &op); +	} + +	if (aes->mode != CCP_AES_MODE_ECB) { +		/* Retrieve the AES context - convert from LE to BE using +		 * 32-byte (256-bit) byteswapping +		 */ +		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, +					CCP_PASSTHRU_BYTESWAP_256BIT); +		if (ret) { +			cmd->engine_error = cmd_q->cmd_error; +			goto e_dst; +		} + +		/* ...but we only need AES_BLOCK_SIZE bytes */ +		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; +		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); +	} + +e_dst: +	if (!in_place) +		ccp_free_data(&dst, cmd_q); + +e_src: +	ccp_free_data(&src, cmd_q); + +e_ctx: +	ccp_dm_free(&ctx); + +e_key: +	ccp_dm_free(&key); + +	return ret; +} + +static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, +			       struct ccp_cmd *cmd) +{ +	struct ccp_xts_aes_engine *xts = &cmd->u.xts; +	struct ccp_dm_workarea key, ctx; +	struct ccp_data src, dst; +	struct ccp_op op; +	unsigned int unit_size, dm_offset; +	bool in_place = false; +	int ret; + +	switch (xts->unit_size) { +	case CCP_XTS_AES_UNIT_SIZE_16: +		unit_size = 16; +		break; +	case CCP_XTS_AES_UNIT_SIZE_512: +		unit_size = 512; +		break; +	case CCP_XTS_AES_UNIT_SIZE_1024: +		unit_size = 1024; +		break; +	case CCP_XTS_AES_UNIT_SIZE_2048: +		unit_size = 2048; +		break; +	case CCP_XTS_AES_UNIT_SIZE_4096: +		unit_size = 4096; +		break; + +	default: +		return -EINVAL; +	} + +	if (xts->key_len != AES_KEYSIZE_128) +		return -EINVAL; + +	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1))) +		return -EINVAL; + +	if (xts->iv_len != AES_BLOCK_SIZE) +		return -EINVAL; + +	if (!xts->key || !xts->iv || !xts->src || !xts->dst) +		return -EINVAL; + +	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1); +	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1); + +	ret = -EIO; +	memset(&op, 0, sizeof(op)); +	op.cmd_q = cmd_q; +	op.jobid = ccp_gen_jobid(cmd_q->ccp); +	op.ksb_key = cmd_q->ksb_key; +	op.ksb_ctx = cmd_q->ksb_ctx; +	op.init = 1; +	op.u.xts.action = xts->action; +	op.u.xts.unit_size = xts->unit_size; + +	/* All supported key sizes fit in a single (32-byte) KSB entry +	 * and must be in little endian format. Use the 256-bit byte +	 * swap passthru option to convert from big endian to little +	 * endian. +	 */ +	ret = ccp_init_dm_workarea(&key, cmd_q, +				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, +				   DMA_TO_DEVICE); +	if (ret) +		return ret; + +	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128; +	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); +	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len); +	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, +			      CCP_PASSTHRU_BYTESWAP_256BIT); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_key; +	} + +	/* The AES context fits in a single (32-byte) KSB entry and +	 * for XTS is already in little endian format so no byte swapping +	 * is needed. +	 */ +	ret = ccp_init_dm_workarea(&ctx, cmd_q, +				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, +				   DMA_BIDIRECTIONAL); +	if (ret) +		goto e_key; + +	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len); +	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, +			      CCP_PASSTHRU_BYTESWAP_NOOP); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_ctx; +	} + +	/* Prepare the input and output data workareas. For in-place +	 * operations we need to set the dma direction to BIDIRECTIONAL +	 * and copy the src workarea to the dst workarea. +	 */ +	if (sg_virt(xts->src) == sg_virt(xts->dst)) +		in_place = true; + +	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len, +			    unit_size, +			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); +	if (ret) +		goto e_ctx; + +	if (in_place) +		dst = src; +	else { +		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len, +				    unit_size, DMA_FROM_DEVICE); +		if (ret) +			goto e_src; +	} + +	/* Send data to the CCP AES engine */ +	while (src.sg_wa.bytes_left) { +		ccp_prepare_data(&src, &dst, &op, unit_size, true); +		if (!src.sg_wa.bytes_left) +			op.eom = 1; + +		ret = ccp_perform_xts_aes(&op); +		if (ret) { +			cmd->engine_error = cmd_q->cmd_error; +			goto e_dst; +		} + +		ccp_process_data(&src, &dst, &op); +	} + +	/* Retrieve the AES context - convert from LE to BE using +	 * 32-byte (256-bit) byteswapping +	 */ +	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, +				CCP_PASSTHRU_BYTESWAP_256BIT); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_dst; +	} + +	/* ...but we only need AES_BLOCK_SIZE bytes */ +	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; +	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len); + +e_dst: +	if (!in_place) +		ccp_free_data(&dst, cmd_q); + +e_src: +	ccp_free_data(&src, cmd_q); + +e_ctx: +	ccp_dm_free(&ctx); + +e_key: +	ccp_dm_free(&key); + +	return ret; +} + +static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ +	struct ccp_sha_engine *sha = &cmd->u.sha; +	struct ccp_dm_workarea ctx; +	struct ccp_data src; +	struct ccp_op op; +	int ret; + +	if (sha->ctx_len != CCP_SHA_CTXSIZE) +		return -EINVAL; + +	if (!sha->ctx) +		return -EINVAL; + +	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1))) +		return -EINVAL; + +	if (!sha->src_len) { +		const u8 *sha_zero; + +		/* Not final, just return */ +		if (!sha->final) +			return 0; + +		/* CCP can't do a zero length sha operation so the caller +		 * must buffer the data. +		 */ +		if (sha->msg_bits) +			return -EINVAL; + +		/* A sha operation for a message with a total length of zero, +		 * return known result. +		 */ +		switch (sha->type) { +		case CCP_SHA_TYPE_1: +			sha_zero = ccp_sha1_zero; +			break; +		case CCP_SHA_TYPE_224: +			sha_zero = ccp_sha224_zero; +			break; +		case CCP_SHA_TYPE_256: +			sha_zero = ccp_sha256_zero; +			break; +		default: +			return -EINVAL; +		} + +		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0, +					 sha->ctx_len, 1); + +		return 0; +	} + +	if (!sha->src) +		return -EINVAL; + +	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1); + +	memset(&op, 0, sizeof(op)); +	op.cmd_q = cmd_q; +	op.jobid = ccp_gen_jobid(cmd_q->ccp); +	op.ksb_ctx = cmd_q->ksb_ctx; +	op.u.sha.type = sha->type; +	op.u.sha.msg_bits = sha->msg_bits; + +	/* The SHA context fits in a single (32-byte) KSB entry and +	 * must be in little endian format. Use the 256-bit byte swap +	 * passthru option to convert from big endian to little endian. +	 */ +	ret = ccp_init_dm_workarea(&ctx, cmd_q, +				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES, +				   DMA_BIDIRECTIONAL); +	if (ret) +		return ret; + +	if (sha->first) { +		const __be32 *init; + +		switch (sha->type) { +		case CCP_SHA_TYPE_1: +			init = ccp_sha1_init; +			break; +		case CCP_SHA_TYPE_224: +			init = ccp_sha224_init; +			break; +		case CCP_SHA_TYPE_256: +			init = ccp_sha256_init; +			break; +		default: +			ret = -EINVAL; +			goto e_ctx; +		} +		memcpy(ctx.address, init, CCP_SHA_CTXSIZE); +	} else +		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); + +	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, +			      CCP_PASSTHRU_BYTESWAP_256BIT); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_ctx; +	} + +	/* Send data to the CCP SHA engine */ +	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len, +			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE); +	if (ret) +		goto e_ctx; + +	while (src.sg_wa.bytes_left) { +		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false); +		if (sha->final && !src.sg_wa.bytes_left) +			op.eom = 1; + +		ret = ccp_perform_sha(&op); +		if (ret) { +			cmd->engine_error = cmd_q->cmd_error; +			goto e_data; +		} + +		ccp_process_data(&src, NULL, &op); +	} + +	/* Retrieve the SHA context - convert from LE to BE using +	 * 32-byte (256-bit) byteswapping to BE +	 */ +	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, +				CCP_PASSTHRU_BYTESWAP_256BIT); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_data; +	} + +	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); + +	if (sha->final && sha->opad) { +		/* HMAC operation, recursively perform final SHA */ +		struct ccp_cmd hmac_cmd; +		struct scatterlist sg; +		u64 block_size, digest_size; +		u8 *hmac_buf; + +		switch (sha->type) { +		case CCP_SHA_TYPE_1: +			block_size = SHA1_BLOCK_SIZE; +			digest_size = SHA1_DIGEST_SIZE; +			break; +		case CCP_SHA_TYPE_224: +			block_size = SHA224_BLOCK_SIZE; +			digest_size = SHA224_DIGEST_SIZE; +			break; +		case CCP_SHA_TYPE_256: +			block_size = SHA256_BLOCK_SIZE; +			digest_size = SHA256_DIGEST_SIZE; +			break; +		default: +			ret = -EINVAL; +			goto e_data; +		} + +		if (sha->opad_len != block_size) { +			ret = -EINVAL; +			goto e_data; +		} + +		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL); +		if (!hmac_buf) { +			ret = -ENOMEM; +			goto e_data; +		} +		sg_init_one(&sg, hmac_buf, block_size + digest_size); + +		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0); +		memcpy(hmac_buf + block_size, ctx.address, digest_size); + +		memset(&hmac_cmd, 0, sizeof(hmac_cmd)); +		hmac_cmd.engine = CCP_ENGINE_SHA; +		hmac_cmd.u.sha.type = sha->type; +		hmac_cmd.u.sha.ctx = sha->ctx; +		hmac_cmd.u.sha.ctx_len = sha->ctx_len; +		hmac_cmd.u.sha.src = &sg; +		hmac_cmd.u.sha.src_len = block_size + digest_size; +		hmac_cmd.u.sha.opad = NULL; +		hmac_cmd.u.sha.opad_len = 0; +		hmac_cmd.u.sha.first = 1; +		hmac_cmd.u.sha.final = 1; +		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3; + +		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd); +		if (ret) +			cmd->engine_error = hmac_cmd.engine_error; + +		kfree(hmac_buf); +	} + +e_data: +	ccp_free_data(&src, cmd_q); + +e_ctx: +	ccp_dm_free(&ctx); + +	return ret; +} + +static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ +	struct ccp_rsa_engine *rsa = &cmd->u.rsa; +	struct ccp_dm_workarea exp, src; +	struct ccp_data dst; +	struct ccp_op op; +	unsigned int ksb_count, i_len, o_len; +	int ret; + +	if (rsa->key_size > CCP_RSA_MAX_WIDTH) +		return -EINVAL; + +	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst) +		return -EINVAL; + +	/* The RSA modulus must precede the message being acted upon, so +	 * it must be copied to a DMA area where the message and the +	 * modulus can be concatenated.  Therefore the input buffer +	 * length required is twice the output buffer length (which +	 * must be a multiple of 256-bits). +	 */ +	o_len = ((rsa->key_size + 255) / 256) * 32; +	i_len = o_len * 2; + +	ksb_count = o_len / CCP_KSB_BYTES; + +	memset(&op, 0, sizeof(op)); +	op.cmd_q = cmd_q; +	op.jobid = ccp_gen_jobid(cmd_q->ccp); +	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count); +	if (!op.ksb_key) +		return -EIO; + +	/* The RSA exponent may span multiple (32-byte) KSB entries and must +	 * be in little endian format. Reverse copy each 32-byte chunk +	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk) +	 * and each byte within that chunk and do not perform any byte swap +	 * operations on the passthru operation. +	 */ +	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE); +	if (ret) +		goto e_ksb; + +	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES, +				true); +	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key, +			      CCP_PASSTHRU_BYTESWAP_NOOP); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_exp; +	} + +	/* Concatenate the modulus and the message. Both the modulus and +	 * the operands must be in little endian format.  Since the input +	 * is in big endian format it must be converted. +	 */ +	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE); +	if (ret) +		goto e_exp; + +	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES, +				true); +	src.address += o_len;	/* Adjust the address for the copy operation */ +	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES, +				true); +	src.address -= o_len;	/* Reset the address to original value */ + +	/* Prepare the output area for the operation */ +	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len, +			    o_len, DMA_FROM_DEVICE); +	if (ret) +		goto e_src; + +	op.soc = 1; +	op.src.u.dma.address = src.dma.address; +	op.src.u.dma.offset = 0; +	op.src.u.dma.length = i_len; +	op.dst.u.dma.address = dst.dm_wa.dma.address; +	op.dst.u.dma.offset = 0; +	op.dst.u.dma.length = o_len; + +	op.u.rsa.mod_size = rsa->key_size; +	op.u.rsa.input_len = i_len; + +	ret = ccp_perform_rsa(&op); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_dst; +	} + +	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len); + +e_dst: +	ccp_free_data(&dst, cmd_q); + +e_src: +	ccp_dm_free(&src); + +e_exp: +	ccp_dm_free(&exp); + +e_ksb: +	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count); + +	return ret; +} + +static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, +				struct ccp_cmd *cmd) +{ +	struct ccp_passthru_engine *pt = &cmd->u.passthru; +	struct ccp_dm_workarea mask; +	struct ccp_data src, dst; +	struct ccp_op op; +	bool in_place = false; +	unsigned int i; +	int ret; + +	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) +		return -EINVAL; + +	if (!pt->src || !pt->dst) +		return -EINVAL; + +	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { +		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE) +			return -EINVAL; +		if (!pt->mask) +			return -EINVAL; +	} + +	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1); + +	memset(&op, 0, sizeof(op)); +	op.cmd_q = cmd_q; +	op.jobid = ccp_gen_jobid(cmd_q->ccp); + +	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { +		/* Load the mask */ +		op.ksb_key = cmd_q->ksb_key; + +		ret = ccp_init_dm_workarea(&mask, cmd_q, +					   CCP_PASSTHRU_KSB_COUNT * +					   CCP_KSB_BYTES, +					   DMA_TO_DEVICE); +		if (ret) +			return ret; + +		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len); +		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key, +				      CCP_PASSTHRU_BYTESWAP_NOOP); +		if (ret) { +			cmd->engine_error = cmd_q->cmd_error; +			goto e_mask; +		} +	} + +	/* Prepare the input and output data workareas. For in-place +	 * operations we need to set the dma direction to BIDIRECTIONAL +	 * and copy the src workarea to the dst workarea. +	 */ +	if (sg_virt(pt->src) == sg_virt(pt->dst)) +		in_place = true; + +	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len, +			    CCP_PASSTHRU_MASKSIZE, +			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); +	if (ret) +		goto e_mask; + +	if (in_place) +		dst = src; +	else { +		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len, +				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE); +		if (ret) +			goto e_src; +	} + +	/* Send data to the CCP Passthru engine +	 *   Because the CCP engine works on a single source and destination +	 *   dma address at a time, each entry in the source scatterlist +	 *   (after the dma_map_sg call) must be less than or equal to the +	 *   (remaining) length in the destination scatterlist entry and the +	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE +	 */ +	dst.sg_wa.sg_used = 0; +	for (i = 1; i <= src.sg_wa.dma_count; i++) { +		if (!dst.sg_wa.sg || +		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) { +			ret = -EINVAL; +			goto e_dst; +		} + +		if (i == src.sg_wa.dma_count) { +			op.eom = 1; +			op.soc = 1; +		} + +		op.src.type = CCP_MEMTYPE_SYSTEM; +		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg); +		op.src.u.dma.offset = 0; +		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg); + +		op.dst.type = CCP_MEMTYPE_SYSTEM; +		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg); +		op.dst.u.dma.offset = dst.sg_wa.sg_used; +		op.dst.u.dma.length = op.src.u.dma.length; + +		ret = ccp_perform_passthru(&op); +		if (ret) { +			cmd->engine_error = cmd_q->cmd_error; +			goto e_dst; +		} + +		dst.sg_wa.sg_used += src.sg_wa.sg->length; +		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) { +			dst.sg_wa.sg = sg_next(dst.sg_wa.sg); +			dst.sg_wa.sg_used = 0; +		} +		src.sg_wa.sg = sg_next(src.sg_wa.sg); +	} + +e_dst: +	if (!in_place) +		ccp_free_data(&dst, cmd_q); + +e_src: +	ccp_free_data(&src, cmd_q); + +e_mask: +	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) +		ccp_dm_free(&mask); + +	return ret; +} + +static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ +	struct ccp_ecc_engine *ecc = &cmd->u.ecc; +	struct ccp_dm_workarea src, dst; +	struct ccp_op op; +	int ret; +	u8 *save; + +	if (!ecc->u.mm.operand_1 || +	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES)) +		return -EINVAL; + +	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) +		if (!ecc->u.mm.operand_2 || +		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES)) +			return -EINVAL; + +	if (!ecc->u.mm.result || +	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES)) +		return -EINVAL; + +	memset(&op, 0, sizeof(op)); +	op.cmd_q = cmd_q; +	op.jobid = ccp_gen_jobid(cmd_q->ccp); + +	/* Concatenate the modulus and the operands. Both the modulus and +	 * the operands must be in little endian format.  Since the input +	 * is in big endian format it must be converted and placed in a +	 * fixed length buffer. +	 */ +	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, +				   DMA_TO_DEVICE); +	if (ret) +		return ret; + +	/* Save the workarea address since it is updated in order to perform +	 * the concatenation +	 */ +	save = src.address; + +	/* Copy the ECC modulus */ +	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len, +				CCP_ECC_OPERAND_SIZE, true); +	src.address += CCP_ECC_OPERAND_SIZE; + +	/* Copy the first operand */ +	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1, +				ecc->u.mm.operand_1_len, +				CCP_ECC_OPERAND_SIZE, true); +	src.address += CCP_ECC_OPERAND_SIZE; + +	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) { +		/* Copy the second operand */ +		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2, +					ecc->u.mm.operand_2_len, +					CCP_ECC_OPERAND_SIZE, true); +		src.address += CCP_ECC_OPERAND_SIZE; +	} + +	/* Restore the workarea address */ +	src.address = save; + +	/* Prepare the output area for the operation */ +	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, +				   DMA_FROM_DEVICE); +	if (ret) +		goto e_src; + +	op.soc = 1; +	op.src.u.dma.address = src.dma.address; +	op.src.u.dma.offset = 0; +	op.src.u.dma.length = src.length; +	op.dst.u.dma.address = dst.dma.address; +	op.dst.u.dma.offset = 0; +	op.dst.u.dma.length = dst.length; + +	op.u.ecc.function = cmd->u.ecc.function; + +	ret = ccp_perform_ecc(&op); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_dst; +	} + +	ecc->ecc_result = le16_to_cpup( +		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); +	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { +		ret = -EIO; +		goto e_dst; +	} + +	/* Save the ECC result */ +	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES); + +e_dst: +	ccp_dm_free(&dst); + +e_src: +	ccp_dm_free(&src); + +	return ret; +} + +static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ +	struct ccp_ecc_engine *ecc = &cmd->u.ecc; +	struct ccp_dm_workarea src, dst; +	struct ccp_op op; +	int ret; +	u8 *save; + +	if (!ecc->u.pm.point_1.x || +	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) || +	    !ecc->u.pm.point_1.y || +	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES)) +		return -EINVAL; + +	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { +		if (!ecc->u.pm.point_2.x || +		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) || +		    !ecc->u.pm.point_2.y || +		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES)) +			return -EINVAL; +	} else { +		if (!ecc->u.pm.domain_a || +		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES)) +			return -EINVAL; + +		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) +			if (!ecc->u.pm.scalar || +			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES)) +				return -EINVAL; +	} + +	if (!ecc->u.pm.result.x || +	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) || +	    !ecc->u.pm.result.y || +	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES)) +		return -EINVAL; + +	memset(&op, 0, sizeof(op)); +	op.cmd_q = cmd_q; +	op.jobid = ccp_gen_jobid(cmd_q->ccp); + +	/* Concatenate the modulus and the operands. Both the modulus and +	 * the operands must be in little endian format.  Since the input +	 * is in big endian format it must be converted and placed in a +	 * fixed length buffer. +	 */ +	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, +				   DMA_TO_DEVICE); +	if (ret) +		return ret; + +	/* Save the workarea address since it is updated in order to perform +	 * the concatenation +	 */ +	save = src.address; + +	/* Copy the ECC modulus */ +	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len, +				CCP_ECC_OPERAND_SIZE, true); +	src.address += CCP_ECC_OPERAND_SIZE; + +	/* Copy the first point X and Y coordinate */ +	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x, +				ecc->u.pm.point_1.x_len, +				CCP_ECC_OPERAND_SIZE, true); +	src.address += CCP_ECC_OPERAND_SIZE; +	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y, +				ecc->u.pm.point_1.y_len, +				CCP_ECC_OPERAND_SIZE, true); +	src.address += CCP_ECC_OPERAND_SIZE; + +	/* Set the first point Z coordianate to 1 */ +	*(src.address) = 0x01; +	src.address += CCP_ECC_OPERAND_SIZE; + +	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { +		/* Copy the second point X and Y coordinate */ +		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x, +					ecc->u.pm.point_2.x_len, +					CCP_ECC_OPERAND_SIZE, true); +		src.address += CCP_ECC_OPERAND_SIZE; +		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y, +					ecc->u.pm.point_2.y_len, +					CCP_ECC_OPERAND_SIZE, true); +		src.address += CCP_ECC_OPERAND_SIZE; + +		/* Set the second point Z coordianate to 1 */ +		*(src.address) = 0x01; +		src.address += CCP_ECC_OPERAND_SIZE; +	} else { +		/* Copy the Domain "a" parameter */ +		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a, +					ecc->u.pm.domain_a_len, +					CCP_ECC_OPERAND_SIZE, true); +		src.address += CCP_ECC_OPERAND_SIZE; + +		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) { +			/* Copy the scalar value */ +			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar, +						ecc->u.pm.scalar_len, +						CCP_ECC_OPERAND_SIZE, true); +			src.address += CCP_ECC_OPERAND_SIZE; +		} +	} + +	/* Restore the workarea address */ +	src.address = save; + +	/* Prepare the output area for the operation */ +	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, +				   DMA_FROM_DEVICE); +	if (ret) +		goto e_src; + +	op.soc = 1; +	op.src.u.dma.address = src.dma.address; +	op.src.u.dma.offset = 0; +	op.src.u.dma.length = src.length; +	op.dst.u.dma.address = dst.dma.address; +	op.dst.u.dma.offset = 0; +	op.dst.u.dma.length = dst.length; + +	op.u.ecc.function = cmd->u.ecc.function; + +	ret = ccp_perform_ecc(&op); +	if (ret) { +		cmd->engine_error = cmd_q->cmd_error; +		goto e_dst; +	} + +	ecc->ecc_result = le16_to_cpup( +		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); +	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { +		ret = -EIO; +		goto e_dst; +	} + +	/* Save the workarea address since it is updated as we walk through +	 * to copy the point math result +	 */ +	save = dst.address; + +	/* Save the ECC result X and Y coordinates */ +	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x, +				CCP_ECC_MODULUS_BYTES); +	dst.address += CCP_ECC_OUTPUT_SIZE; +	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y, +				CCP_ECC_MODULUS_BYTES); +	dst.address += CCP_ECC_OUTPUT_SIZE; + +	/* Restore the workarea address */ +	dst.address = save; + +e_dst: +	ccp_dm_free(&dst); + +e_src: +	ccp_dm_free(&src); + +	return ret; +} + +static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ +	struct ccp_ecc_engine *ecc = &cmd->u.ecc; + +	ecc->ecc_result = 0; + +	if (!ecc->mod || +	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES)) +		return -EINVAL; + +	switch (ecc->function) { +	case CCP_ECC_FUNCTION_MMUL_384BIT: +	case CCP_ECC_FUNCTION_MADD_384BIT: +	case CCP_ECC_FUNCTION_MINV_384BIT: +		return ccp_run_ecc_mm_cmd(cmd_q, cmd); + +	case CCP_ECC_FUNCTION_PADD_384BIT: +	case CCP_ECC_FUNCTION_PMUL_384BIT: +	case CCP_ECC_FUNCTION_PDBL_384BIT: +		return ccp_run_ecc_pm_cmd(cmd_q, cmd); + +	default: +		return -EINVAL; +	} +} + +int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ +	int ret; + +	cmd->engine_error = 0; +	cmd_q->cmd_error = 0; +	cmd_q->int_rcvd = 0; +	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); + +	switch (cmd->engine) { +	case CCP_ENGINE_AES: +		ret = ccp_run_aes_cmd(cmd_q, cmd); +		break; +	case CCP_ENGINE_XTS_AES_128: +		ret = ccp_run_xts_aes_cmd(cmd_q, cmd); +		break; +	case CCP_ENGINE_SHA: +		ret = ccp_run_sha_cmd(cmd_q, cmd); +		break; +	case CCP_ENGINE_RSA: +		ret = ccp_run_rsa_cmd(cmd_q, cmd); +		break; +	case CCP_ENGINE_PASSTHRU: +		ret = ccp_run_passthru_cmd(cmd_q, cmd); +		break; +	case CCP_ENGINE_ECC: +		ret = ccp_run_ecc_cmd(cmd_q, cmd); +		break; +	default: +		ret = -EINVAL; +	} + +	return ret; +} diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c new file mode 100644 index 00000000000..0d746236df5 --- /dev/null +++ b/drivers/crypto/ccp/ccp-pci.c @@ -0,0 +1,360 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +#define IO_BAR				2 +#define MSIX_VECTORS			2 + +struct ccp_msix { +	u32 vector; +	char name[16]; +}; + +struct ccp_pci { +	int msix_count; +	struct ccp_msix msix[MSIX_VECTORS]; +}; + +static int ccp_get_msix_irqs(struct ccp_device *ccp) +{ +	struct ccp_pci *ccp_pci = ccp->dev_specific; +	struct device *dev = ccp->dev; +	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); +	struct msix_entry msix_entry[MSIX_VECTORS]; +	unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1; +	int v, ret; + +	for (v = 0; v < ARRAY_SIZE(msix_entry); v++) +		msix_entry[v].entry = v; + +	ret = pci_enable_msix_range(pdev, msix_entry, 1, v); +	if (ret < 0) +		return ret; + +	ccp_pci->msix_count = ret; +	for (v = 0; v < ccp_pci->msix_count; v++) { +		/* Set the interrupt names and request the irqs */ +		snprintf(ccp_pci->msix[v].name, name_len, "ccp-%u", v); +		ccp_pci->msix[v].vector = msix_entry[v].vector; +		ret = request_irq(ccp_pci->msix[v].vector, ccp_irq_handler, +				  0, ccp_pci->msix[v].name, dev); +		if (ret) { +			dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n", +				   ret); +			goto e_irq; +		} +	} + +	return 0; + +e_irq: +	while (v--) +		free_irq(ccp_pci->msix[v].vector, dev); + +	pci_disable_msix(pdev); + +	ccp_pci->msix_count = 0; + +	return ret; +} + +static int ccp_get_msi_irq(struct ccp_device *ccp) +{ +	struct device *dev = ccp->dev; +	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); +	int ret; + +	ret = pci_enable_msi(pdev); +	if (ret) +		return ret; + +	ret = request_irq(pdev->irq, ccp_irq_handler, 0, "ccp", dev); +	if (ret) { +		dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret); +		goto e_msi; +	} + +	return 0; + +e_msi: +	pci_disable_msi(pdev); + +	return ret; +} + +static int ccp_get_irqs(struct ccp_device *ccp) +{ +	struct device *dev = ccp->dev; +	int ret; + +	ret = ccp_get_msix_irqs(ccp); +	if (!ret) +		return 0; + +	/* Couldn't get MSI-X vectors, try MSI */ +	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret); +	ret = ccp_get_msi_irq(ccp); +	if (!ret) +		return 0; + +	/* Couldn't get MSI interrupt */ +	dev_notice(dev, "could not enable MSI (%d)\n", ret); + +	return ret; +} + +static void ccp_free_irqs(struct ccp_device *ccp) +{ +	struct ccp_pci *ccp_pci = ccp->dev_specific; +	struct device *dev = ccp->dev; +	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + +	if (ccp_pci->msix_count) { +		while (ccp_pci->msix_count--) +			free_irq(ccp_pci->msix[ccp_pci->msix_count].vector, +				 dev); +		pci_disable_msix(pdev); +	} else { +		free_irq(pdev->irq, dev); +		pci_disable_msi(pdev); +	} +} + +static int ccp_find_mmio_area(struct ccp_device *ccp) +{ +	struct device *dev = ccp->dev; +	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); +	resource_size_t io_len; +	unsigned long io_flags; +	int bar; + +	io_flags = pci_resource_flags(pdev, IO_BAR); +	io_len = pci_resource_len(pdev, IO_BAR); +	if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800))) +		return IO_BAR; + +	for (bar = 0; bar < PCI_STD_RESOURCE_END; bar++) { +		io_flags = pci_resource_flags(pdev, bar); +		io_len = pci_resource_len(pdev, bar); +		if ((io_flags & IORESOURCE_MEM) && +		    (io_len >= (IO_OFFSET + 0x800))) +			return bar; +	} + +	return -EIO; +} + +static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ +	struct ccp_device *ccp; +	struct ccp_pci *ccp_pci; +	struct device *dev = &pdev->dev; +	unsigned int bar; +	int ret; + +	ret = -ENOMEM; +	ccp = ccp_alloc_struct(dev); +	if (!ccp) +		goto e_err; + +	ccp_pci = kzalloc(sizeof(*ccp_pci), GFP_KERNEL); +	if (!ccp_pci) { +		ret = -ENOMEM; +		goto e_free1; +	} +	ccp->dev_specific = ccp_pci; +	ccp->get_irq = ccp_get_irqs; +	ccp->free_irq = ccp_free_irqs; + +	ret = pci_request_regions(pdev, "ccp"); +	if (ret) { +		dev_err(dev, "pci_request_regions failed (%d)\n", ret); +		goto e_free2; +	} + +	ret = pci_enable_device(pdev); +	if (ret) { +		dev_err(dev, "pci_enable_device failed (%d)\n", ret); +		goto e_regions; +	} + +	pci_set_master(pdev); + +	ret = ccp_find_mmio_area(ccp); +	if (ret < 0) +		goto e_device; +	bar = ret; + +	ret = -EIO; +	ccp->io_map = pci_iomap(pdev, bar, 0); +	if (ccp->io_map == NULL) { +		dev_err(dev, "pci_iomap failed\n"); +		goto e_device; +	} +	ccp->io_regs = ccp->io_map + IO_OFFSET; + +	ret = dma_set_mask(dev, DMA_BIT_MASK(48)); +	if (ret == 0) { +		ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(48)); +		if (ret) { +			dev_err(dev, +				"pci_set_consistent_dma_mask failed (%d)\n", +				ret); +			goto e_bar0; +		} +	} else { +		ret = dma_set_mask(dev, DMA_BIT_MASK(32)); +		if (ret) { +			dev_err(dev, "pci_set_dma_mask failed (%d)\n", ret); +			goto e_bar0; +		} +	} + +	dev_set_drvdata(dev, ccp); + +	ret = ccp_init(ccp); +	if (ret) +		goto e_bar0; + +	dev_notice(dev, "enabled\n"); + +	return 0; + +e_bar0: +	pci_iounmap(pdev, ccp->io_map); + +e_device: +	pci_disable_device(pdev); + +e_regions: +	pci_release_regions(pdev); + +e_free2: +	kfree(ccp_pci); + +e_free1: +	kfree(ccp); + +e_err: +	dev_notice(dev, "initialization failed\n"); +	return ret; +} + +static void ccp_pci_remove(struct pci_dev *pdev) +{ +	struct device *dev = &pdev->dev; +	struct ccp_device *ccp = dev_get_drvdata(dev); + +	if (!ccp) +		return; + +	ccp_destroy(ccp); + +	pci_iounmap(pdev, ccp->io_map); + +	pci_disable_device(pdev); + +	pci_release_regions(pdev); + +	kfree(ccp); + +	dev_notice(dev, "disabled\n"); +} + +#ifdef CONFIG_PM +static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state) +{ +	struct device *dev = &pdev->dev; +	struct ccp_device *ccp = dev_get_drvdata(dev); +	unsigned long flags; +	unsigned int i; + +	spin_lock_irqsave(&ccp->cmd_lock, flags); + +	ccp->suspending = 1; + +	/* Wake all the queue kthreads to prepare for suspend */ +	for (i = 0; i < ccp->cmd_q_count; i++) +		wake_up_process(ccp->cmd_q[i].kthread); + +	spin_unlock_irqrestore(&ccp->cmd_lock, flags); + +	/* Wait for all queue kthreads to say they're done */ +	while (!ccp_queues_suspended(ccp)) +		wait_event_interruptible(ccp->suspend_queue, +					 ccp_queues_suspended(ccp)); + +	return 0; +} + +static int ccp_pci_resume(struct pci_dev *pdev) +{ +	struct device *dev = &pdev->dev; +	struct ccp_device *ccp = dev_get_drvdata(dev); +	unsigned long flags; +	unsigned int i; + +	spin_lock_irqsave(&ccp->cmd_lock, flags); + +	ccp->suspending = 0; + +	/* Wake up all the kthreads */ +	for (i = 0; i < ccp->cmd_q_count; i++) { +		ccp->cmd_q[i].suspended = 0; +		wake_up_process(ccp->cmd_q[i].kthread); +	} + +	spin_unlock_irqrestore(&ccp->cmd_lock, flags); + +	return 0; +} +#endif + +static DEFINE_PCI_DEVICE_TABLE(ccp_pci_table) = { +	{ PCI_VDEVICE(AMD, 0x1537), }, +	/* Last entry must be zero */ +	{ 0, } +}; +MODULE_DEVICE_TABLE(pci, ccp_pci_table); + +static struct pci_driver ccp_pci_driver = { +	.name = "AMD Cryptographic Coprocessor", +	.id_table = ccp_pci_table, +	.probe = ccp_pci_probe, +	.remove = ccp_pci_remove, +#ifdef CONFIG_PM +	.suspend = ccp_pci_suspend, +	.resume = ccp_pci_resume, +#endif +}; + +int ccp_pci_init(void) +{ +	return pci_register_driver(&ccp_pci_driver); +} + +void ccp_pci_exit(void) +{ +	pci_unregister_driver(&ccp_pci_driver); +}  | 
