1 files changed, 2090 insertions, 0 deletions
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
new file mode 100644
index 00000000000..23163fda503
--- /dev/null
+++ b/drivers/crypto/n2_core.c
@@ -0,0 +1,2090 @@
+/* n2_core.c: Niagara2 Stream Processing Unit (SPU) crypto support.
+ *
+ * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/crypto.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+
+#include "n2_core.h"
+
+#define DRV_MODULE_NAME		"n2_crypto"
+#define DRV_MODULE_VERSION	"0.1"
+#define DRV_MODULE_RELDATE	"April 29, 2010"
+
+static char version[] __devinitdata =
+	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Niagara2 Crypto driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+#define N2_CRA_PRIORITY		300
+
+static DEFINE_MUTEX(spu_lock);
+
+struct spu_queue {
+	cpumask_t		sharing;
+	unsigned long		qhandle;
+
+	spinlock_t		lock;
+	u8			q_type;
+	void			*q;
+	unsigned long		head;
+	unsigned long		tail;
+	struct list_head	jobs;
+
+	unsigned long		devino;
+
+	char			irq_name[32];
+	unsigned int		irq;
+
+	struct list_head	list;
+};
+
+static struct spu_queue **cpu_to_cwq;
+static struct spu_queue **cpu_to_mau;
+
+static unsigned long spu_next_offset(struct spu_queue *q, unsigned long off)
+{
+	if (q->q_type == HV_NCS_QTYPE_MAU) {
+		off += MAU_ENTRY_SIZE;
+		if (off == (MAU_ENTRY_SIZE * MAU_NUM_ENTRIES))
+			off = 0;
+	} else {
+		off += CWQ_ENTRY_SIZE;
+		if (off == (CWQ_ENTRY_SIZE * CWQ_NUM_ENTRIES))
+			off = 0;
+	}
+	return off;
+}
+
+struct n2_request_common {
+	struct list_head	entry;
+	unsigned int		offset;
+};
+#define OFFSET_NOT_RUNNING	(~(unsigned int)0)
+
+/* An async job request records the final tail value it used in
+ * n2_request_common->offset, test to see if that offset is in
+ * the range old_head, new_head, inclusive.
+ */
+static inline bool job_finished(struct spu_queue *q, unsigned int offset,
+				unsigned long old_head, unsigned long new_head)
+{
+	if (old_head <= new_head) {
+		if (offset > old_head && offset <= new_head)
+			return true;
+	} else {
+		if (offset > old_head || offset <= new_head)
+			return true;
+	}
+	return false;
+}
+
+/* When the HEAD marker is unequal to the actual HEAD, we get
+ * a virtual device INO interrupt.  We should process the
+ * completed CWQ entries and adjust the HEAD marker to clear
+ * the IRQ.
+ */
+static irqreturn_t cwq_intr(int irq, void *dev_id)
+{
+	unsigned long off, new_head, hv_ret;
+	struct spu_queue *q = dev_id;
+
+	pr_err("CPU[%d]: Got CWQ interrupt for qhdl[%lx]\n",
+	       smp_processor_id(), q->qhandle);
+
+	spin_lock(&q->lock);
+
+	hv_ret = sun4v_ncs_gethead(q->qhandle, &new_head);
+
+	pr_err("CPU[%d]: CWQ gethead[%lx] hv_ret[%lu]\n",
+	       smp_processor_id(), new_head, hv_ret);
+
+	for (off = q->head; off != new_head; off = spu_next_offset(q, off)) {
+		/* XXX ... XXX */
+	}
+
+	hv_ret = sun4v_ncs_sethead_marker(q->qhandle, new_head);
+	if (hv_ret == HV_EOK)
+		q->head = new_head;
+
+	spin_unlock(&q->lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mau_intr(int irq, void *dev_id)
+{
+	struct spu_queue *q = dev_id;
+	unsigned long head, hv_ret;
+
+	spin_lock(&q->lock);
+
+	pr_err("CPU[%d]: Got MAU interrupt for qhdl[%lx]\n",
+	       smp_processor_id(), q->qhandle);
+
+	hv_ret = sun4v_ncs_gethead(q->qhandle, &head);
+
+	pr_err("CPU[%d]: MAU gethead[%lx] hv_ret[%lu]\n",
+	       smp_processor_id(), head, hv_ret);
+
+	sun4v_ncs_sethead_marker(q->qhandle, head);
+
+	spin_unlock(&q->lock);
+
+	return IRQ_HANDLED;
+}
+
+static void *spu_queue_next(struct spu_queue *q, void *cur)
+{
+	return q->q + spu_next_offset(q, cur - q->q);
+}
+
+static int spu_queue_num_free(struct spu_queue *q)
+{
+	unsigned long head = q->head;
+	unsigned long tail = q->tail;
+	unsigned long end = (CWQ_ENTRY_SIZE * CWQ_NUM_ENTRIES);
+	unsigned long diff;
+
+	if (head > tail)
+		diff = head - tail;
+	else
+		diff = (end - tail) + head;
+
+	return (diff / CWQ_ENTRY_SIZE) - 1;
+}
+
+static void *spu_queue_alloc(struct spu_queue *q, int num_entries)
+{
+	int avail = spu_queue_num_free(q);
+
+	if (avail >= num_entries)
+		return q->q + q->tail;
+
+	return NULL;
+}
+
+static unsigned long spu_queue_submit(struct spu_queue *q, void *last)
+{
+	unsigned long hv_ret, new_tail;
+
+	new_tail = spu_next_offset(q, last - q->q);
+
+	hv_ret = sun4v_ncs_settail(q->qhandle, new_tail);
+	if (hv_ret == HV_EOK)
+		q->tail = new_tail;
+	return hv_ret;
+}
+
+static u64 control_word_base(unsigned int len, unsigned int hmac_key_len,
+			     int enc_type, int auth_type,
+			     unsigned int hash_len,
+			     bool sfas, bool sob, bool eob, bool encrypt,
+			     int opcode)
+{
+	u64 word = (len - 1) & CONTROL_LEN;
+
+	word |= ((u64) opcode << CONTROL_OPCODE_SHIFT);
+	word |= ((u64) enc_type << CONTROL_ENC_TYPE_SHIFT);
+	word |= ((u64) auth_type << CONTROL_AUTH_TYPE_SHIFT);
+	if (sfas)
+		word |= CONTROL_STORE_FINAL_AUTH_STATE;
+	if (sob)
+		word |= CONTROL_START_OF_BLOCK;
+	if (eob)
+		word |= CONTROL_END_OF_BLOCK;
+	if (encrypt)
+		word |= CONTROL_ENCRYPT;
+	if (hmac_key_len)
+		word |= ((u64) (hmac_key_len - 1)) << CONTROL_HMAC_KEY_LEN_SHIFT;
+	if (hash_len)
+		word |= ((u64) (hash_len - 1)) << CONTROL_HASH_LEN_SHIFT;
+
+	return word;
+}
+
+#if 0
+static inline bool n2_should_run_async(struct spu_queue *qp, int this_len)
+{
+	if (this_len >= 64 ||
+	    qp->head != qp->tail)
+		return true;
+	return false;
+}
+#endif
+
+struct n2_base_ctx {
+	struct list_head		list;
+};
+
+static void n2_base_ctx_init(struct n2_base_ctx *ctx)
+{
+	INIT_LIST_HEAD(&ctx->list);
+}
+
+struct n2_hash_ctx {
+	struct n2_base_ctx		base;
+
+	struct crypto_ahash		*fallback_tfm;
+};
+
+struct n2_hash_req_ctx {
+	union {
+		struct md5_state	md5;
+		struct sha1_state	sha1;
+		struct sha256_state	sha256;
+	} u;
+
+	unsigned char			hash_key[64];
+	unsigned char			keyed_zero_hash[32];
+
+	struct ahash_request		fallback_req;
+};
+
+static int n2_hash_async_init(struct ahash_request *req)
+{
+	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+	rctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	return crypto_ahash_init(&rctx->fallback_req);
+}
+
+static int n2_hash_async_update(struct ahash_request *req)
+{
+	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+	rctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	rctx->fallback_req.nbytes = req->nbytes;
+	rctx->fallback_req.src = req->src;
+
+	return crypto_ahash_update(&rctx->fallback_req);
+}
+
+static int n2_hash_async_final(struct ahash_request *req)
+{
+	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+	rctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	rctx->fallback_req.result = req->result;
+
+	return crypto_ahash_final(&rctx->fallback_req);
+}
+
+static int n2_hash_async_finup(struct ahash_request *req)
+{
+	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+	rctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	rctx->fallback_req.nbytes = req->nbytes;
+	rctx->fallback_req.src = req->src;
+	rctx->fallback_req.result = req->result;
+
+	return crypto_ahash_finup(&rctx->fallback_req);
+}
+
+static int n2_hash_cra_init(struct crypto_tfm *tfm)
+{
+	const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct crypto_ahash *fallback_tfm;
+	int err;
+
+	fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0,
+					  CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
+		pr_warning("Fallback driver '%s' could not be loaded!\n",
+			   fallback_driver_name);
+		err = PTR_ERR(fallback_tfm);
+		goto out;
+	}
+
+	crypto_ahash_set_reqsize(ahash, (sizeof(struct n2_hash_req_ctx) +
+					 crypto_ahash_reqsize(fallback_tfm)));
+
+	ctx->fallback_tfm = fallback_tfm;
+	return 0;
+
+out:
+	return err;
+}
+
+static void n2_hash_cra_exit(struct crypto_tfm *tfm)
+{
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+
+	crypto_free_ahash(ctx->fallback_tfm);
+}
+
+static unsigned long wait_for_tail(struct spu_queue *qp)
+{
+	unsigned long head, hv_ret;
+
+	do {
+		hv_ret = sun4v_ncs_gethead(qp->qhandle, &head);
+		if (hv_ret != HV_EOK) {
+			pr_err("Hypervisor error on gethead\n");
+			break;
+		}
+		if (head == qp->tail) {
+			qp->head = head;
+			break;
+		}
+	} while (1);
+	return hv_ret;
+}
+
+static unsigned long submit_and_wait_for_tail(struct spu_queue *qp,
+					      struct cwq_initial_entry *ent)
+{
+	unsigned long hv_ret = spu_queue_submit(qp, ent);
+
+	if (hv_ret == HV_EOK)
+		hv_ret = wait_for_tail(qp);
+
+	return hv_ret;
+}
+
+static int n2_hash_async_digest(struct ahash_request *req,
+				unsigned int auth_type, unsigned int digest_size,
+				unsigned int result_size, void *hash_loc)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cwq_initial_entry *ent;
+	struct crypto_hash_walk walk;
+	struct spu_queue *qp;
+	unsigned long flags;
+	int err = -ENODEV;
+	int nbytes, cpu;
+
+	/* The total effective length of the operation may not
+	 * exceed 2^16.
+	 */
+	if (unlikely(req->nbytes > (1 << 16))) {
+		struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+
+		ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+		rctx->fallback_req.base.flags =
+			req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+		rctx->fallback_req.nbytes = req->nbytes;
+		rctx->fallback_req.src = req->src;
+		rctx->fallback_req.result = req->result;
+
+		return crypto_ahash_digest(&rctx->fallback_req);
+	}
+
+	n2_base_ctx_init(&ctx->base);
+
+	nbytes = crypto_hash_walk_first(req, &walk);
+
+	cpu = get_cpu();
+	qp = cpu_to_cwq[cpu];
+	if (!qp)
+		goto out;
+
+	spin_lock_irqsave(&qp->lock, flags);
+
+	/* XXX can do better, improve this later by doing a by-hand scatterlist
+	 * XXX walk, etc.
+	 */
+	ent = qp->q + qp->tail;
+
+	ent->control = control_word_base(nbytes, 0, 0,
+					 auth_type, digest_size,
+					 false, true, false, false,
+					 OPCODE_INPLACE_BIT |
+					 OPCODE_AUTH_MAC);
+	ent->src_addr = __pa(walk.data);
+	ent->auth_key_addr = 0UL;
+	ent->auth_iv_addr = __pa(hash_loc);
+	ent->final_auth_state_addr = 0UL;
+	ent->enc_key_addr = 0UL;
+	ent->enc_iv_addr = 0UL;
+	ent->dest_addr = __pa(hash_loc);
+
+	nbytes = crypto_hash_walk_done(&walk, 0);
+	while (nbytes > 0) {
+		ent = spu_queue_next(qp, ent);
+
+		ent->control = (nbytes - 1);
+		ent->src_addr = __pa(walk.data);
+		ent->auth_key_addr = 0UL;
+		ent->auth_iv_addr = 0UL;
+		ent->final_auth_state_addr = 0UL;
+		ent->enc_key_addr = 0UL;
+		ent->enc_iv_addr = 0UL;
+		ent->dest_addr = 0UL;
+
+		nbytes = crypto_hash_walk_done(&walk, 0);
+	}
+	ent->control |= CONTROL_END_OF_BLOCK;
+
+	if (submit_and_wait_for_tail(qp, ent) != HV_EOK)
+		err = -EINVAL;
+	else
+		err = 0;
+
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	if (!err)
+		memcpy(req->result, hash_loc, result_size);
+out:
+	put_cpu();
+
+	return err;
+}
+
+static int n2_md5_async_digest(struct ahash_request *req)
+{
+	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+	struct md5_state *m = &rctx->u.md5;
+
+	if (unlikely(req->nbytes == 0)) {
+		static const char md5_zero[MD5_DIGEST_SIZE] = {
+			0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
+			0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
+		};
+
+		memcpy(req->result, md5_zero, MD5_DIGEST_SIZE);
+		return 0;
+	}
+	m->hash[0] = cpu_to_le32(0x67452301);
+	m->hash[1] = cpu_to_le32(0xefcdab89);
+	m->hash[2] = cpu_to_le32(0x98badcfe);
+	m->hash[3] = cpu_to_le32(0x10325476);
+
+	return n2_hash_async_digest(req, AUTH_TYPE_MD5,
+				    MD5_DIGEST_SIZE, MD5_DIGEST_SIZE,
+				    m->hash);
+}
+
+static int n2_sha1_async_digest(struct ahash_request *req)
+{
+	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+	struct sha1_state *s = &rctx->u.sha1;
+
+	if (unlikely(req->nbytes == 0)) {
+		static const char sha1_zero[SHA1_DIGEST_SIZE] = {
+			0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32,
+			0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8,
+			0x07, 0x09
+		};
+
+		memcpy(req->result, sha1_zero, SHA1_DIGEST_SIZE);
+		return 0;
+	}
+	s->state[0] = SHA1_H0;
+	s->state[1] = SHA1_H1;
+	s->state[2] = SHA1_H2;
+	s->state[3] = SHA1_H3;
+	s->state[4] = SHA1_H4;
+
+	return n2_hash_async_digest(req, AUTH_TYPE_SHA1,
+				    SHA1_DIGEST_SIZE, SHA1_DIGEST_SIZE,
+				    s->state);
+}
+
+static int n2_sha256_async_digest(struct ahash_request *req)
+{
+	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+	struct sha256_state *s = &rctx->u.sha256;
+
+	if (req->nbytes == 0) {
+		static const char sha256_zero[SHA256_DIGEST_SIZE] = {
+			0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a,
+			0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae,
+			0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99,
+			0x1b, 0x78, 0x52, 0xb8, 0x55
+		};
+
+		memcpy(req->result, sha256_zero, SHA256_DIGEST_SIZE);
+		return 0;
+	}
+	s->state[0] = SHA256_H0;
+	s->state[1] = SHA256_H1;
+	s->state[2] = SHA256_H2;
+	s->state[3] = SHA256_H3;
+	s->state[4] = SHA256_H4;
+	s->state[5] = SHA256_H5;
+	s->state[6] = SHA256_H6;
+	s->state[7] = SHA256_H7;
+
+	return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
+				    SHA256_DIGEST_SIZE, SHA256_DIGEST_SIZE,
+				    s->state);
+}
+
+static int n2_sha224_async_digest(struct ahash_request *req)
+{
+	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+	struct sha256_state *s = &rctx->u.sha256;
+
+	if (req->nbytes == 0) {
+		static const char sha224_zero[SHA224_DIGEST_SIZE] = {
+			0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47,
+			0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2,
+			0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4,
+			0x2f
+		};
+
+		memcpy(req->result, sha224_zero, SHA224_DIGEST_SIZE);
+		return 0;
+	}
+	s->state[0] = SHA224_H0;
+	s->state[1] = SHA224_H1;
+	s->state[2] = SHA224_H2;
+	s->state[3] = SHA224_H3;
+	s->state[4] = SHA224_H4;
+	s->state[5] = SHA224_H5;
+	s->state[6] = SHA224_H6;
+	s->state[7] = SHA224_H7;
+
+	return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
+				    SHA256_DIGEST_SIZE, SHA224_DIGEST_SIZE,
+				    s->state);
+}
+
+struct n2_cipher_context {
+	int			key_len;
+	int			enc_type;
+	union {
+		u8		aes[AES_MAX_KEY_SIZE];
+		u8		des[DES_KEY_SIZE];
+		u8		des3[3 * DES_KEY_SIZE];
+		u8		arc4[258]; /* S-box, X, Y */
+	} key;
+};
+
+#define N2_CHUNK_ARR_LEN	16
+
+struct n2_crypto_chunk {
+	struct list_head	entry;
+	unsigned long		iv_paddr : 44;
+	unsigned long		arr_len : 20;
+	unsigned long		dest_paddr;
+	unsigned long		dest_final;
+	struct {
+		unsigned long	src_paddr : 44;
+		unsigned long	src_len : 20;
+	} arr[N2_CHUNK_ARR_LEN];
+};
+
+struct n2_request_context {
+	struct ablkcipher_walk	walk;
+	struct list_head	chunk_list;
+	struct n2_crypto_chunk	chunk;
+	u8			temp_iv[16];
+};
+
+/* The SPU allows some level of flexibility for partial cipher blocks
+ * being specified in a descriptor.
+ *
+ * It merely requires that every descriptor's length field is at least
+ * as large as the cipher block size.  This means that a cipher block
+ * can span at most 2 descriptors.  However, this does not allow a
+ * partial block to span into the final descriptor as that would
+ * violate the rule (since every descriptor's length must be at lest
+ * the block size).  So, for example, assuming an 8 byte block size:
+ *
+ *	0xe --> 0xa --> 0x8
+ *
+ * is a valid length sequence, whereas:
+ *
+ *	0xe --> 0xb --> 0x7
+ *
+ * is not a valid sequence.
+ */
+
+struct n2_cipher_alg {
+	struct list_head	entry;
+	u8			enc_type;
+	struct crypto_alg	alg;
+};
+
+static inline struct n2_cipher_alg *n2_cipher_alg(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+
+	return container_of(alg, struct n2_cipher_alg, alg);
+}
+
+struct n2_cipher_request_context {
+	struct ablkcipher_walk	walk;
+};
+
+static int n2_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			 unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+
+	ctx->enc_type = (n2alg->enc_type & ENC_TYPE_CHAINING_MASK);
+
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+		ctx->enc_type |= ENC_TYPE_ALG_AES128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->enc_type |= ENC_TYPE_ALG_AES192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->enc_type |= ENC_TYPE_ALG_AES256;
+		break;
+	default:
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	ctx->key_len = keylen;
+	memcpy(ctx->key.aes, key, keylen);
+	return 0;
+}
+
+static int n2_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			 unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+	u32 tmp[DES_EXPKEY_WORDS];
+	int err;
+
+	ctx->enc_type = n2alg->enc_type;
+
+	if (keylen != DES_KEY_SIZE) {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	err = des_ekey(tmp, key);
+	if (err == 0 && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	ctx->key_len = keylen;
+	memcpy(ctx->key.des, key, keylen);
+	return 0;
+}
+
+static int n2_3des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			  unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+
+	ctx->enc_type = n2alg->enc_type;
+
+	if (keylen != (3 * DES_KEY_SIZE)) {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	ctx->key_len = keylen;
+	memcpy(ctx->key.des3, key, keylen);
+	return 0;
+}
+
+static int n2_arc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			  unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+	u8 *s = ctx->key.arc4;
+	u8 *x = s + 256;
+	u8 *y = x + 1;
+	int i, j, k;
+
+	ctx->enc_type = n2alg->enc_type;
+
+	j = k = 0;
+	*x = 0;
+	*y = 0;
+	for (i = 0; i < 256; i++)
+		s[i] = i;
+	for (i = 0; i < 256; i++) {
+		u8 a = s[i];
+		j = (j + key[k] + a) & 0xff;
+		s[i] = s[j];
+		s[j] = a;
+		if (++k >= keylen)
+			k = 0;
+	}
+
+	return 0;
+}
+
+static inline int cipher_descriptor_len(int nbytes, unsigned int block_size)
+{
+	int this_len = nbytes;
+
+	this_len -= (nbytes & (block_size - 1));
+	return this_len > (1 << 16) ? (1 << 16) : this_len;
+}
+
+static int __n2_crypt_chunk(struct crypto_tfm *tfm, struct n2_crypto_chunk *cp,
+			    struct spu_queue *qp, bool encrypt)
+{
+	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+	struct cwq_initial_entry *ent;
+	bool in_place;
+	int i;
+
+	ent = spu_queue_alloc(qp, cp->arr_len);
+	if (!ent) {
+		pr_info("queue_alloc() of %d fails\n",
+			cp->arr_len);
+		return -EBUSY;
+	}
+
+	in_place = (cp->dest_paddr == cp->arr[0].src_paddr);
+
+	ent->control = control_word_base(cp->arr[0].src_len,
+					 0, ctx->enc_type, 0, 0,
+					 false, true, false, encrypt,
+					 OPCODE_ENCRYPT |
+					 (in_place ? OPCODE_INPLACE_BIT : 0));
+	ent->src_addr = cp->arr[0].src_paddr;
+	ent->auth_key_addr = 0UL;
+	ent->auth_iv_addr = 0UL;
+	ent->final_auth_state_addr = 0UL;
+	ent->enc_key_addr = __pa(&ctx->key);
+	ent->enc_iv_addr = cp->iv_paddr;
+	ent->dest_addr = (in_place ? 0UL : cp->dest_paddr);
+
+	for (i = 1; i < cp->arr_len; i++) {
+		ent = spu_queue_next(qp, ent);
+
+		ent->control = cp->arr[i].src_len - 1;
+		ent->src_addr = cp->arr[i].src_paddr;
+		ent->auth_key_addr = 0UL;
+		ent->auth_iv_addr = 0UL;
+		ent->final_auth_state_addr = 0UL;
+		ent->enc_key_addr = 0UL;
+		ent->enc_iv_addr = 0UL;
+		ent->dest_addr = 0UL;
+	}
+	ent->control |= CONTROL_END_OF_BLOCK;
+
+	return (spu_queue_submit(qp, ent) != HV_EOK) ? -EINVAL : 0;
+}
+
+static int n2_compute_chunks(struct ablkcipher_request *req)
+{
+	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+	struct ablkcipher_walk *walk = &rctx->walk;
+	struct n2_crypto_chunk *chunk;
+	unsigned long dest_prev;
+	unsigned int tot_len;
+	bool prev_in_place;
+	int err, nbytes;
+
+	ablkcipher_walk_init(walk, req->dst, req->src, req->nbytes);
+	err = ablkcipher_walk_phys(req, walk);
+	if (err)
+		return err;
+
+	INIT_LIST_HEAD(&rctx->chunk_list);
+
+	chunk = &rctx->chunk;
+	INIT_LIST_HEAD(&chunk->entry);
+
+	chunk->iv_paddr = 0UL;
+	chunk->arr_len = 0;
+	chunk->dest_paddr = 0UL;
+
+	prev_in_place = false;
+	dest_prev = ~0UL;
+	tot_len = 0;
+
+	while ((nbytes = walk->nbytes) != 0) {
+		unsigned long dest_paddr, src_paddr;
+		bool in_place;
+		int this_len;
+
+		src_paddr = (page_to_phys(walk->src.page) +
+			     walk->src.offset);
+		dest_paddr = (page_to_phys(walk->dst.page) +
+			      walk->dst.offset);
+		in_place = (src_paddr == dest_paddr);
+		this_len = cipher_descriptor_len(nbytes, walk->blocksize);
+
+		if (chunk->arr_len != 0) {
+			if (in_place != prev_in_place ||
+			    (!prev_in_place &&
+			     dest_paddr != dest_prev) ||
+			    chunk->arr_len == N2_CHUNK_ARR_LEN ||
+			    tot_len + this_len > (1 << 16)) {
+				chunk->dest_final = dest_prev;
+				list_add_tail(&chunk->entry,
+					      &rctx->chunk_list);
+				chunk = kzalloc(sizeof(*chunk), GFP_ATOMIC);
+				if (!chunk) {
+					err = -ENOMEM;
+					break;
+				}
+				INIT_LIST_HEAD(&chunk->entry);
+			}
+		}
+		if (chunk->arr_len == 0) {
+			chunk->dest_paddr = dest_paddr;
+			tot_len = 0;
+		}
+		chunk->arr[chunk->arr_len].src_paddr = src_paddr;
+		chunk->arr[chunk->arr_len].src_len = this_len;
+		chunk->arr_len++;
+
+		dest_prev = dest_paddr + this_len;
+		prev_in_place = in_place;
+		tot_len += this_len;
+
+		err = ablkcipher_walk_done(req, walk, nbytes - this_len);
+		if (err)
+			break;
+	}
+	if (!err && chunk->arr_len != 0) {
+		chunk->dest_final = dest_prev;
+		list_add_tail(&chunk->entry, &rctx->chunk_list);
+	}
+
+	return err;
+}
+
+static void n2_chunk_complete(struct ablkcipher_request *req, void *final_iv)
+{
+	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+	struct n2_crypto_chunk *c, *tmp;
+
+	if (final_iv)
+		memcpy(rctx->walk.iv, final_iv, rctx->walk.blocksize);
+
+	ablkcipher_walk_complete(&rctx->walk);
+	list_for_each_entry_safe(c, tmp, &rctx->chunk_list, entry) {
+		list_del(&c->entry);
+		if (unlikely(c != &rctx->chunk))
+			kfree(c);
+	}
+
+}
+
+static int n2_do_ecb(struct ablkcipher_request *req, bool encrypt)
+{
+	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+	struct crypto_tfm *tfm = req->base.tfm;
+	int err = n2_compute_chunks(req);
+	struct n2_crypto_chunk *c, *tmp;
+	unsigned long flags, hv_ret;
+	struct spu_queue *qp;
+
+	if (err)
+		return err;
+
+	qp = cpu_to_cwq[get_cpu()];
+	err = -ENODEV;
+	if (!qp)
+		goto out;
+
+	spin_lock_irqsave(&qp->lock, flags);
+
+	list_for_each_entry_safe(c, tmp, &rctx->chunk_list, entry) {
+		err = __n2_crypt_chunk(tfm, c, qp, encrypt);
+		if (err)
+			break;
+		list_del(&c->entry);
+		if (unlikely(c != &rctx->chunk))
+			kfree(c);
+	}
+	if (!err) {
+		hv_ret = wait_for_tail(qp);
+		if (hv_ret != HV_EOK)
+			err = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	put_cpu();
+
+out:
+	n2_chunk_complete(req, NULL);
+	return err;
+}
+
+static int n2_encrypt_ecb(struct ablkcipher_request *req)
+{
+	return n2_do_ecb(req, true);
+}
+
+static int n2_decrypt_ecb(struct ablkcipher_request *req)
+{
+	return n2_do_ecb(req, false);
+}
+
+static int n2_do_chaining(struct ablkcipher_request *req, bool encrypt)
+{
+	struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+	struct crypto_tfm *tfm = req->base.tfm;
+	unsigned long flags, hv_ret, iv_paddr;
+	int err = n2_compute_chunks(req);
+	struct n2_crypto_chunk *c, *tmp;
+	struct spu_queue *qp;
+	void *final_iv_addr;
+
+	final_iv_addr = NULL;
+
+	if (err)
+		return err;
+
+	qp = cpu_to_cwq[get_cpu()];
+	err = -ENODEV;
+	if (!qp)
+		goto out;
+
+	spin_lock_irqsave(&qp->lock, flags);
+
+	if (encrypt) {
+		iv_paddr = __pa(rctx->walk.iv);
+		list_for_each_entry_safe(c, tmp, &rctx->chunk_list,
+					 entry) {
+			c->iv_paddr = iv_paddr;
+			err = __n2_crypt_chunk(tfm, c, qp, true);
+			if (err)
+				break;
+			iv_paddr = c->dest_final - rctx->walk.blocksize;
+			list_del(&c->entry);
+			if (unlikely(c != &rctx->chunk))
+				kfree(c);
+		}
+		final_iv_addr = __va(iv_paddr);
+	} else {
+		list_for_each_entry_safe_reverse(c, tmp, &rctx->chunk_list,
+						 entry) {
+			if (c == &rctx->chunk) {
+				iv_paddr = __pa(rctx->walk.iv);
+			} else {
+				iv_paddr = (tmp->arr[tmp->arr_len-1].src_paddr +
+					    tmp->arr[tmp->arr_len-1].src_len -
+					    rctx->walk.blocksize);
+			}
+			if (!final_iv_addr) {
+				unsigned long pa;
+
+				pa = (c->arr[c->arr_len-1].src_paddr +
+				      c->arr[c->arr_len-1].src_len -
+				      rctx->walk.blocksize);
+				final_iv_addr = rctx->temp_iv;
+				memcpy(rctx->temp_iv, __va(pa),
+				       rctx->walk.blocksize);
+			}
+			c->iv_paddr = iv_paddr;
+			err = __n2_crypt_chunk(tfm, c, qp, false);
+			if (err)
+				break;
+			list_del(&c->entry);
+			if (unlikely(c != &rctx->chunk))
+				kfree(c);
+		}
+	}
+	if (!err) {
+		hv_ret = wait_for_tail(qp);
+		if (hv_ret != HV_EOK)
+			err = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	put_cpu();
+
+out:
+	n2_chunk_complete(req, err ? NULL : final_iv_addr);
+	return err;
+}
+
+static int n2_encrypt_chaining(struct ablkcipher_request *req)
+{
+	return n2_do_chaining(req, true);
+}
+
+static int n2_decrypt_chaining(struct ablkcipher_request *req)
+{
+	return n2_do_chaining(req, false);
+}
+
+struct n2_cipher_tmpl {
+	const char		*name;
+	const char		*drv_name;
+	u8			block_size;
+	u8			enc_type;
+	struct ablkcipher_alg	ablkcipher;
+};
+
+static const struct n2_cipher_tmpl cipher_tmpls[] = {
+	/* ARC4: only ECB is supported (chaining bits ignored) */
+	{	.name		= "ecb(arc4)",
+		.drv_name	= "ecb-arc4",
+		.block_size	= 1,
+		.enc_type	= (ENC_TYPE_ALG_RC4_STREAM |
+				   ENC_TYPE_CHAINING_ECB),
+		.ablkcipher	= {
+			.min_keysize	= 1,
+			.max_keysize	= 256,
+			.setkey		= n2_arc4_setkey,
+			.encrypt	= n2_encrypt_ecb,
+			.decrypt	= n2_decrypt_ecb,
+		},
+	},
+
+	/* DES: ECB CBC and CFB are supported */
+	{	.name		= "ecb(des)",
+		.drv_name	= "ecb-des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_DES |
+				   ENC_TYPE_CHAINING_ECB),
+		.ablkcipher	= {
+			.min_keysize	= DES_KEY_SIZE,
+			.max_keysize	= DES_KEY_SIZE,
+			.setkey		= n2_des_setkey,
+			.encrypt	= n2_encrypt_ecb,
+			.decrypt	= n2_decrypt_ecb,
+		},
+	},
+	{	.name		= "cbc(des)",
+		.drv_name	= "cbc-des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_DES |
+				   ENC_TYPE_CHAINING_CBC),
+		.ablkcipher	= {
+			.ivsize		= DES_BLOCK_SIZE,
+			.min_keysize	= DES_KEY_SIZE,
+			.max_keysize	= DES_KEY_SIZE,
+			.setkey		= n2_des_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+	{	.name		= "cfb(des)",
+		.drv_name	= "cfb-des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_DES |
+				   ENC_TYPE_CHAINING_CFB),
+		.ablkcipher	= {
+			.min_keysize	= DES_KEY_SIZE,
+			.max_keysize	= DES_KEY_SIZE,
+			.setkey		= n2_des_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+
+	/* 3DES: ECB CBC and CFB are supported */
+	{	.name		= "ecb(des3_ede)",
+		.drv_name	= "ecb-3des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_3DES |
+				   ENC_TYPE_CHAINING_ECB),
+		.ablkcipher	= {
+			.min_keysize	= 3 * DES_KEY_SIZE,
+			.max_keysize	= 3 * DES_KEY_SIZE,
+			.setkey		= n2_3des_setkey,
+			.encrypt	= n2_encrypt_ecb,
+			.decrypt	= n2_decrypt_ecb,
+		},
+	},
+	{	.name		= "cbc(des3_ede)",
+		.drv_name	= "cbc-3des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_3DES |
+				   ENC_TYPE_CHAINING_CBC),
+		.ablkcipher	= {
+			.ivsize		= DES_BLOCK_SIZE,
+			.min_keysize	= 3 * DES_KEY_SIZE,
+			.max_keysize	= 3 * DES_KEY_SIZE,
+			.setkey		= n2_3des_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+	{	.name		= "cfb(des3_ede)",
+		.drv_name	= "cfb-3des",
+		.block_size	= DES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_3DES |
+				   ENC_TYPE_CHAINING_CFB),
+		.ablkcipher	= {
+			.min_keysize	= 3 * DES_KEY_SIZE,
+			.max_keysize	= 3 * DES_KEY_SIZE,
+			.setkey		= n2_3des_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+	/* AES: ECB CBC and CTR are supported */
+	{	.name		= "ecb(aes)",
+		.drv_name	= "ecb-aes",
+		.block_size	= AES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_AES128 |
+				   ENC_TYPE_CHAINING_ECB),
+		.ablkcipher	= {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= n2_aes_setkey,
+			.encrypt	= n2_encrypt_ecb,
+			.decrypt	= n2_decrypt_ecb,
+		},
+	},
+	{	.name		= "cbc(aes)",
+		.drv_name	= "cbc-aes",
+		.block_size	= AES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_AES128 |
+				   ENC_TYPE_CHAINING_CBC),
+		.ablkcipher	= {
+			.ivsize		= AES_BLOCK_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= n2_aes_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_decrypt_chaining,
+		},
+	},
+	{	.name		= "ctr(aes)",
+		.drv_name	= "ctr-aes",
+		.block_size	= AES_BLOCK_SIZE,
+		.enc_type	= (ENC_TYPE_ALG_AES128 |
+				   ENC_TYPE_CHAINING_COUNTER),
+		.ablkcipher	= {
+			.ivsize		= AES_BLOCK_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= n2_aes_setkey,
+			.encrypt	= n2_encrypt_chaining,
+			.decrypt	= n2_encrypt_chaining,
+		},
+	},
+
+};
+#define NUM_CIPHER_TMPLS ARRAY_SIZE(cipher_tmpls)
+
+static LIST_HEAD(cipher_algs);
+
+struct n2_hash_tmpl {
+	const char	*name;
+	int		(*digest)(struct ahash_request *req);
+	u8		digest_size;
+	u8		block_size;
+};
+static const struct n2_hash_tmpl hash_tmpls[] = {
+	{ .name		= "md5",
+	  .digest	= n2_md5_async_digest,
+	  .digest_size	= MD5_DIGEST_SIZE,
+	  .block_size	= MD5_HMAC_BLOCK_SIZE },
+	{ .name		= "sha1",
+	  .digest	= n2_sha1_async_digest,
+	  .digest_size	= SHA1_DIGEST_SIZE,
+	  .block_size	= SHA1_BLOCK_SIZE },
+	{ .name		= "sha256",
+	  .digest	= n2_sha256_async_digest,
+	  .digest_size	= SHA256_DIGEST_SIZE,
+	  .block_size	= SHA256_BLOCK_SIZE },
+	{ .n