diff options
author | Tom Lendacky <thomas.lendacky@amd.com> | 2013-11-12 11:46:16 -0600 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2013-12-05 21:28:37 +0800 |
commit | 63b945091a070d8d4275dc0f7699ba22cd5f9435 (patch) | |
tree | 720bd381770f1519531262f3659eccdf3c79e9bd /drivers/crypto | |
parent | 8ec25c51291681bd68bdc290b35f2e61fa601c21 (diff) |
crypto: ccp - CCP device driver and interface support
These routines provide the device driver support for the AMD
Cryptographic Coprocessor (CCP).
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'drivers/crypto')
-rw-r--r-- | drivers/crypto/ccp/ccp-dev.c | 582 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-dev.h | 272 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-ops.c | 2020 | ||||
-rw-r--r-- | drivers/crypto/ccp/ccp-pci.c | 360 |
4 files changed, 3234 insertions, 0 deletions
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c new file mode 100644 index 00000000000..de59df97017 --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.c @@ -0,0 +1,582 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/hw_random.h> +#include <linux/cpu.h> +#include <asm/cpu_device_id.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); +MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver"); + + +static struct ccp_device *ccp_dev; +static inline struct ccp_device *ccp_get_device(void) +{ + return ccp_dev; +} + +static inline void ccp_add_device(struct ccp_device *ccp) +{ + ccp_dev = ccp; +} + +static inline void ccp_del_device(struct ccp_device *ccp) +{ + ccp_dev = NULL; +} + +/** + * ccp_enqueue_cmd - queue an operation for processing by the CCP + * + * @cmd: ccp_cmd struct to be processed + * + * Queue a cmd to be processed by the CCP. If queueing the cmd + * would exceed the defined length of the cmd queue the cmd will + * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will + * result in a return code of -EBUSY. + * + * The callback routine specified in the ccp_cmd struct will be + * called to notify the caller of completion (if the cmd was not + * backlogged) or advancement out of the backlog. If the cmd has + * advanced out of the backlog the "err" value of the callback + * will be -EINPROGRESS. Any other "err" value during callback is + * the result of the operation. + * + * The cmd has been successfully queued if: + * the return code is -EINPROGRESS or + * the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set + */ +int ccp_enqueue_cmd(struct ccp_cmd *cmd) +{ + struct ccp_device *ccp = ccp_get_device(); + unsigned long flags; + unsigned int i; + int ret; + + if (!ccp) + return -ENODEV; + + /* Caller must supply a callback routine */ + if (!cmd->callback) + return -EINVAL; + + cmd->ccp = ccp; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + i = ccp->cmd_q_count; + + if (ccp->cmd_count >= MAX_CMD_QLEN) { + ret = -EBUSY; + if (cmd->flags & CCP_CMD_MAY_BACKLOG) + list_add_tail(&cmd->entry, &ccp->backlog); + } else { + ret = -EINPROGRESS; + ccp->cmd_count++; + list_add_tail(&cmd->entry, &ccp->cmd); + + /* Find an idle queue */ + if (!ccp->suspending) { + for (i = 0; i < ccp->cmd_q_count; i++) { + if (ccp->cmd_q[i].active) + continue; + + break; + } + } + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* If we found an idle queue, wake it up */ + if (i < ccp->cmd_q_count) + wake_up_process(ccp->cmd_q[i].kthread); + + return ret; +} +EXPORT_SYMBOL_GPL(ccp_enqueue_cmd); + +static void ccp_do_cmd_backlog(struct work_struct *work) +{ + struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work); + struct ccp_device *ccp = cmd->ccp; + unsigned long flags; + unsigned int i; + + cmd->callback(cmd->data, -EINPROGRESS); + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->cmd_count++; + list_add_tail(&cmd->entry, &ccp->cmd); + + /* Find an idle queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + if (ccp->cmd_q[i].active) + continue; + + break; + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* If we found an idle queue, wake it up */ + if (i < ccp->cmd_q_count) + wake_up_process(ccp->cmd_q[i].kthread); +} + +static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q) +{ + struct ccp_device *ccp = cmd_q->ccp; + struct ccp_cmd *cmd = NULL; + struct ccp_cmd *backlog = NULL; + unsigned long flags; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + cmd_q->active = 0; + + if (ccp->suspending) { + cmd_q->suspended = 1; + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + wake_up_interruptible(&ccp->suspend_queue); + + return NULL; + } + + if (ccp->cmd_count) { + cmd_q->active = 1; + + cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); + list_del(&cmd->entry); + + ccp->cmd_count--; + } + + if (!list_empty(&ccp->backlog)) { + backlog = list_first_entry(&ccp->backlog, struct ccp_cmd, + entry); + list_del(&backlog->entry); + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + if (backlog) { + INIT_WORK(&backlog->work, ccp_do_cmd_backlog); + schedule_work(&backlog->work); + } + + return cmd; +} + +static void ccp_do_cmd_complete(struct work_struct *work) +{ + struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work); + + cmd->callback(cmd->data, cmd->ret); +} + +static int ccp_cmd_queue_thread(void *data) +{ + struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data; + struct ccp_cmd *cmd; + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + + set_current_state(TASK_INTERRUPTIBLE); + + cmd = ccp_dequeue_cmd(cmd_q); + if (!cmd) + continue; + + __set_current_state(TASK_RUNNING); + + /* Execute the command */ + cmd->ret = ccp_run_cmd(cmd_q, cmd); + + /* Schedule the completion callback */ + INIT_WORK(&cmd->work, ccp_do_cmd_complete); + schedule_work(&cmd->work); + } + + __set_current_state(TASK_RUNNING); + + return 0; +} + +static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ + struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng); + u32 trng_value; + int len = min_t(int, sizeof(trng_value), max); + + /* + * Locking is provided by the caller so we can update device + * hwrng-related fields safely + */ + trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG); + if (!trng_value) { + /* Zero is returned if not data is available or if a + * bad-entropy error is present. Assume an error if + * we exceed TRNG_RETRIES reads of zero. + */ + if (ccp->hwrng_retries++ > TRNG_RETRIES) + return -EIO; + + return 0; + } + + /* Reset the counter and save the rng value */ + ccp->hwrng_retries = 0; + memcpy(data, &trng_value, len); + + return len; +} + +/** + * ccp_alloc_struct - allocate and initialize the ccp_device struct + * + * @dev: device struct of the CCP + */ +struct ccp_device *ccp_alloc_struct(struct device *dev) +{ + struct ccp_device *ccp; + + ccp = kzalloc(sizeof(*ccp), GFP_KERNEL); + if (ccp == NULL) { + dev_err(dev, "unable to allocate device struct\n"); + return NULL; + } + ccp->dev = dev; + + INIT_LIST_HEAD(&ccp->cmd); + INIT_LIST_HEAD(&ccp->backlog); + + spin_lock_init(&ccp->cmd_lock); + mutex_init(&ccp->req_mutex); + mutex_init(&ccp->ksb_mutex); + ccp->ksb_count = KSB_COUNT; + ccp->ksb_start = 0; + + return ccp; +} + +/** + * ccp_init - initialize the CCP device + * + * @ccp: ccp_device struct + */ +int ccp_init(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct ccp_cmd_queue *cmd_q; + struct dma_pool *dma_pool; + char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; + unsigned int qmr, qim, i; + int ret; + + /* Find available queues */ + qim = 0; + qmr = ioread32(ccp->io_regs + Q_MASK_REG); + for (i = 0; i < MAX_HW_QUEUES; i++) { + if (!(qmr & (1 << i))) + continue; + + /* Allocate a dma pool for this queue */ + snprintf(dma_pool_name, sizeof(dma_pool_name), "ccp_q%d", i); + dma_pool = dma_pool_create(dma_pool_name, dev, + CCP_DMAPOOL_MAX_SIZE, + CCP_DMAPOOL_ALIGN, 0); + if (!dma_pool) { + dev_err(dev, "unable to allocate dma pool\n"); + ret = -ENOMEM; + goto e_pool; + } + + cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; + ccp->cmd_q_count++; + + cmd_q->ccp = ccp; + cmd_q->id = i; + cmd_q->dma_pool = dma_pool; + + /* Reserve 2 KSB regions for the queue */ + cmd_q->ksb_key = KSB_START + ccp->ksb_start++; + cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++; + ccp->ksb_count -= 2; + + /* Preset some register values and masks that are queue + * number dependent + */ + cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE + + (CMD_Q_STATUS_INCR * i); + cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE + + (CMD_Q_STATUS_INCR * i); + cmd_q->int_ok = 1 << (i * 2); + cmd_q->int_err = 1 << ((i * 2) + 1); + + cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); + + init_waitqueue_head(&cmd_q->int_queue); + + /* Build queue interrupt mask (two interrupts per queue) */ + qim |= cmd_q->int_ok | cmd_q->int_err; + + dev_dbg(dev, "queue #%u available\n", i); + } + if (ccp->cmd_q_count == 0) { + dev_notice(dev, "no command queues available\n"); + ret = -EIO; + goto e_pool; + } + dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); + + /* Disable and clear interrupts until ready */ + iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + } + iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); + + /* Request an irq */ + ret = ccp->get_irq(ccp); + if (ret) { + dev_err(dev, "unable to allocate an IRQ\n"); + goto e_pool; + } + + /* Initialize the queues used to wait for KSB space and suspend */ + init_waitqueue_head(&ccp->ksb_queue); + init_waitqueue_head(&ccp->suspend_queue); + + /* Create a kthread for each queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + struct task_struct *kthread; + + cmd_q = &ccp->cmd_q[i]; + + kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, + "ccp-q%u", cmd_q->id); + if (IS_ERR(kthread)) { + dev_err(dev, "error creating queue thread (%ld)\n", + PTR_ERR(kthread)); + ret = PTR_ERR(kthread); + goto e_kthread; + } + + cmd_q->kthread = kthread; + wake_up_process(kthread); + } + + /* Register the RNG */ + ccp->hwrng.name = "ccp-rng"; + ccp->hwrng.read = ccp_trng_read; + ret = hwrng_register(&ccp->hwrng); + if (ret) { + dev_err(dev, "error registering hwrng (%d)\n", ret); + goto e_kthread; + } + + /* Make the device struct available before enabling interrupts */ + ccp_add_device(ccp); + + /* Enable interrupts */ + iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); + + return 0; + +e_kthread: + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + + ccp->free_irq(ccp); + +e_pool: + for (i = 0; i < ccp->cmd_q_count; i++) + dma_pool_destroy(ccp->cmd_q[i].dma_pool); + + return ret; +} + +/** + * ccp_destroy - tear down the CCP device + * + * @ccp: ccp_device struct + */ +void ccp_destroy(struct ccp_device *ccp) +{ + struct ccp_cmd_queue *cmd_q; + struct ccp_cmd *cmd; + unsigned int qim, i; + + /* Remove general access to the device struct */ + ccp_del_device(ccp); + + /* Unregister the RNG */ + hwrng_unregister(&ccp->hwrng); + + /* Stop the queue kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + + /* Build queue interrupt mask (two interrupt masks per queue) */ + qim = 0; + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + qim |= cmd_q->int_ok | cmd_q->int_err; + } + + /* Disable and clear interrupts */ + iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + } + iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); + + ccp->free_irq(ccp); + + for (i = 0; i < ccp->cmd_q_count; i++) + dma_pool_destroy(ccp->cmd_q[i].dma_pool); + + /* Flush the cmd and backlog queue */ + while (!list_empty(&ccp->cmd)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } + while (!list_empty(&ccp->backlog)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } +} + +/** + * ccp_irq_handler - handle interrupts generated by the CCP device + * + * @irq: the irq associated with the interrupt + * @data: the data value supplied when the irq was created + */ +irqreturn_t ccp_irq_handler(int irq, void *data) +{ + struct device *dev = data; + struct ccp_device *ccp = dev_get_drvdata(dev); + struct ccp_cmd_queue *cmd_q; + u32 q_int, status; + unsigned int i; + + status = ioread32(ccp->io_regs + IRQ_STATUS_REG); + + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + q_int = status & (cmd_q->int_ok | cmd_q->int_err); + if (q_int) { + cmd_q->int_status = status; + cmd_q->q_status = ioread32(cmd_q->reg_status); + cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); + + /* On error, only save the first error value */ + if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) + cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); + + cmd_q->int_rcvd = 1; + + /* Acknowledge the interrupt and wake the kthread */ + iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); + wake_up_interruptible(&cmd_q->int_queue); + } + } + + return IRQ_HANDLED; +} + +#ifdef CONFIG_PM +bool ccp_queues_suspended(struct ccp_device *ccp) +{ + unsigned int suspended = 0; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].suspended) + suspended++; + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + return ccp->cmd_q_count == suspended; +} +#endif + +static const struct x86_cpu_id ccp_support[] = { + { X86_VENDOR_AMD, 22, }, +}; + +static int __init ccp_mod_init(void) +{ + struct cpuinfo_x86 *cpuinfo = &boot_cpu_data; + + if (!x86_match_cpu(ccp_support)) + return -ENODEV; + + switch (cpuinfo->x86) { + case 22: + if ((cpuinfo->x86_model < 48) || (cpuinfo->x86_model > 63)) + return -ENODEV; + return ccp_pci_init(); + break; + }; + + return -ENODEV; +} + +static void __exit ccp_mod_exit(void) +{ + struct cpuinfo_x86 *cpuinfo = &boot_cpu_data; + + switch (cpuinfo->x86) { + case 22: + ccp_pci_exit(); + break; + }; +} + +module_init(ccp_mod_init); +module_exit(ccp_mod_exit); diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h new file mode 100644 index 00000000000..7ec536e702e --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.h @@ -0,0 +1,272 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CCP_DEV_H__ +#define __CCP_DEV_H__ + +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/dmapool.h> +#include <linux/hw_random.h> + + +#define IO_OFFSET 0x20000 + +#define MAX_DMAPOOL_NAME_LEN 32 + +#define MAX_HW_QUEUES 5 +#define MAX_CMD_QLEN 100 + +#define TRNG_RETRIES 10 + + +/****** Register Mappings ******/ +#define Q_MASK_REG 0x000 +#define TRNG_OUT_REG 0x00c +#define IRQ_MASK_REG 0x040 +#define IRQ_STATUS_REG 0x200 + +#define DEL_CMD_Q_JOB 0x124 +#define DEL_Q_ACTIVE 0x00000200 +#define DEL_Q_ID_SHIFT 6 + +#define CMD_REQ0 0x180 +#define CMD_REQ_INCR 0x04 + +#define CMD_Q_STATUS_BASE 0x210 +#define CMD_Q_INT_STATUS_BASE 0x214 +#define CMD_Q_STATUS_INCR 0x20 + +#define CMD_Q_CACHE 0x228 +#define CMD_Q_CACHE_INC 0x20 + +#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f); +#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f); + +/****** REQ0 Related Values ******/ +#define REQ0_WAIT_FOR_WRITE 0x00000004 +#define REQ0_INT_ON_COMPLETE 0x00000002 +#define REQ0_STOP_ON_COMPLETE 0x00000001 + +#define REQ0_CMD_Q_SHIFT 9 +#define REQ0_JOBID_SHIFT 3 + +/****** REQ1 Related Values ******/ +#define REQ1_PROTECT_SHIFT 27 +#define REQ1_ENGINE_SHIFT 23 +#define REQ1_KEY_KSB_SHIFT 2 + +#define REQ1_EOM 0x00000002 +#define REQ1_INIT 0x00000001 + +/* AES Related Values */ +#define REQ1_AES_TYPE_SHIFT 21 +#define REQ1_AES_MODE_SHIFT 18 +#define REQ1_AES_ACTION_SHIFT 17 +#define REQ1_AES_CFB_SIZE_SHIFT 10 + +/* XTS-AES Related Values */ +#define REQ1_XTS_AES_SIZE_SHIFT 10 + +/* SHA Related Values */ +#define REQ1_SHA_TYPE_SHIFT 21 + +/* RSA Related Values */ +#define REQ1_RSA_MOD_SIZE_SHIFT 10 + +/* Pass-Through Related Values */ +#define REQ1_PT_BW_SHIFT 12 +#define REQ1_PT_BS_SHIFT 10 + +/* ECC Related Values */ +#define REQ1_ECC_AFFINE_CONVERT 0x00200000 +#define REQ1_ECC_FUNCTION_SHIFT 18 + +/****** REQ4 Related Values ******/ +#define REQ4_KSB_SHIFT 18 +#define REQ4_MEMTYPE_SHIFT 16 + +/****** REQ6 Related Values ******/ +#define REQ6_MEMTYPE_SHIFT 16 + + +/****** Key Storage Block ******/ +#define KSB_START 77 +#define KSB_END 127 +#define KSB_COUNT (KSB_END - KSB_START + 1) +#define CCP_KSB_BITS 256 +#define CCP_KSB_BYTES 32 + +#define CCP_JOBID_MASK 0x0000003f + +#define CCP_DMAPOOL_MAX_SIZE 64 +#define CCP_DMAPOOL_ALIGN (1 << 5) + +#define CCP_REVERSE_BUF_SIZE 64 + +#define CCP_AES_KEY_KSB_COUNT 1 +#define CCP_AES_CTX_KSB_COUNT 1 + +#define CCP_XTS_AES_KEY_KSB_COUNT 1 +#define CCP_XTS_AES_CTX_KSB_COUNT 1 + +#define CCP_SHA_KSB_COUNT 1 + +#define CCP_RSA_MAX_WIDTH 4096 + +#define CCP_PASSTHRU_BLOCKSIZE 256 +#define CCP_PASSTHRU_MASKSIZE 32 +#define CCP_PASSTHRU_KSB_COUNT 1 + +#define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */ +#define CCP_ECC_MAX_OPERANDS 6 +#define CCP_ECC_MAX_OUTPUTS 3 +#define CCP_ECC_SRC_BUF_SIZE 448 +#define CCP_ECC_DST_BUF_SIZE 192 +#define CCP_ECC_OPERAND_SIZE 64 +#define CCP_ECC_OUTPUT_SIZE 64 +#define CCP_ECC_RESULT_OFFSET 60 +#define CCP_ECC_RESULT_SUCCESS 0x0001 + + +struct ccp_device; +struct ccp_cmd; + +struct ccp_cmd_queue { + struct ccp_device *ccp; + + /* Queue identifier */ + u32 id; + + /* Queue dma pool */ + struct dma_pool *dma_pool; + + /* Queue reserved KSB regions */ + u32 ksb_key; + u32 ksb_ctx; + + /* Queue processing thread */ + struct task_struct *kthread; + unsigned int active; + unsigned int suspended; + + /* Number of free command slots available */ + unsigned int free_slots; + + /* Interrupt masks */ + u32 int_ok; + u32 int_err; + + /* Register addresses for queue */ + void __iomem *reg_status; + void __iomem *reg_int_status; + + /* Status values from job */ + u32 int_status; + u32 q_status; + u32 q_int_status; + u32 cmd_error; + + /* Interrupt wait queue */ + wait_queue_head_t int_queue; + unsigned int int_rcvd; +} ____cacheline_aligned; + +struct ccp_device { + struct device *dev; + + /* + * Bus specific device information + */ + void *dev_specific; + int (*get_irq)(struct ccp_device *ccp); + void (*free_irq)(struct ccp_device *ccp); + + /* + * I/O area used for device communication. The register mapping + * starts at an offset into the mapped bar. + * The CMD_REQx registers and the Delete_Cmd_Queue_Job register + * need to be protected while a command queue thread is accessing + * them. + */ + struct mutex req_mutex ____cacheline_aligned; + void __iomem *io_map; + void __iomem *io_regs; + + /* + * Master lists that all cmds are queued on. Because there can be + * more than one CCP command queue that can process a cmd a separate + * backlog list is neeeded so that the backlog completion call + * completes before the cmd is available for execution. + */ + spinlock_t cmd_lock ____cacheline_aligned; + unsigned int cmd_count; + struct list_head cmd; + struct list_head backlog; + + /* + * The command queues. These represent the queues available on the + * CCP that are available for processing cmds + */ + struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES]; + unsigned int cmd_q_count; + + /* + * Support for the CCP True RNG + */ + struct hwrng hwrng; + unsigned int hwrng_retries; + + /* + * A counter used to generate job-ids for cmds submitted to the CCP + */ + atomic_t current_id ____cacheline_aligned; + + /* + * The CCP uses key storage blocks (KSB) to maintain context for certain + * operations. To prevent multiple cmds from using the same KSB range + * a command queue reserves a KSB range for the duration of the cmd. + * Each queue, will however, reserve 2 KSB blocks for operations that + * only require single KSB entries (eg. AES context/iv and key) in order + * to avoid allocation contention. This will reserve at most 10 KSB + * entries, leaving 40 KSB entries available for dynamic allocation. + */ + struct mutex ksb_mutex ____cacheline_aligned; + DECLARE_BITMAP(ksb, KSB_COUNT); + wait_queue_head_t ksb_queue; + unsigned int ksb_avail; + unsigned int ksb_count; + u32 ksb_start; + + /* Suspend support */ + unsigned int suspending; + wait_queue_head_t suspend_queue; +}; + + +int ccp_pci_init(void); +void ccp_pci_exit(void); + +struct ccp_device *ccp_alloc_struct(struct device *dev); +int ccp_init(struct ccp_device *ccp); +void ccp_destroy(struct ccp_device *ccp); +bool ccp_queues_suspended(struct ccp_device *ccp); + +irqreturn_t ccp_irq_handler(int irq, void *data); + +int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd); + +#endif diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c new file mode 100644 index 00000000000..4be09103754 --- /dev/null +++ b/drivers/crypto/ccp/ccp-ops.c @@ -0,0 +1,2020 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/ccp.h> +#include <linux/scatterlist.h> +#include <crypto/scatterwalk.h> + +#include "ccp-dev.h" + + +enum ccp_memtype { + CCP_MEMTYPE_SYSTEM = 0, + CCP_MEMTYPE_KSB, + CCP_MEMTYPE_LOCAL, + CCP_MEMTYPE__LAST, +}; + +struct ccp_dma_info { + dma_addr_t address; + unsigned int offset; + unsigned int length; + enum dma_data_direction dir; +}; + +struct ccp_dm_workarea { + struct device *dev; + struct dma_pool *dma_pool; + unsigned int length; + + u8 *address; + struct ccp_dma_info dma; +}; + +struct ccp_sg_workarea { + struct scatterlist *sg; + unsigned int nents; + unsigned int length; + + struct scatterlist *dma_sg; + struct device *dma_dev; + unsigned int dma_count; + enum dma_data_direction dma_dir; + + u32 sg_used; + + u32 bytes_left; +}; + +struct ccp_data { + struct ccp_sg_workarea sg_wa; + struct ccp_dm_workarea dm_wa; +}; + +struct ccp_mem { + enum ccp_memtype type; + union { + struct ccp_dma_info dma; + u32 ksb; + } u; +}; + +struct ccp_aes_op { + enum ccp_aes_type type; + enum ccp_aes_mode mode; + enum ccp_aes_action action; +}; + +struct ccp_xts_aes_op { + enum ccp_aes_action action; + enum ccp_xts_aes_unit_size unit_size; +}; + +struct ccp_sha_op { + enum ccp_sha_type type; + u64 msg_bits; +}; + +struct ccp_rsa_op { + u32 mod_size; + u32 input_len; +}; + +struct ccp_passthru_op { + enum ccp_passthru_bitwise bit_mod; + enum ccp_passthru_byteswap byte_swap; +}; + +struct ccp_ecc_op { + enum ccp_ecc_function function; +}; + +struct ccp_op { + struct ccp_cmd_queue *cmd_q; + + u32 jobid; + u32 ioc; + u32 soc; + u32 ksb_key; + u32 ksb_ctx; + u32 init; + u32 eom; + + struct ccp_mem src; + struct ccp_mem dst; + + union { + struct ccp_aes_op aes; + struct ccp_xts_aes_op xts; + struct ccp_sha_op sha; + struct ccp_rsa_op rsa; + struct ccp_passthru_op passthru; + struct ccp_ecc_op ecc; + } u; +}; + +/* The CCP cannot perform zero-length sha operations so the caller + * is required to buffer data for the final operation. However, a + * sha operation for a message with a total length of zero is valid + * so known values are required to supply the result. + */ +static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = { + 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, + 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, + 0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = { + 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, + 0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, + 0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, + 0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = { + 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, + 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, + 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, + 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55, +}; + +static u32 ccp_addr_lo(struct ccp_dma_info *info) +{ + return lower_32_bits(info->address + info->offset); +} + +static u32 ccp_addr_hi(struct ccp_dma_info *info) +{ + return upper_32_bits(info->address + info->offset) & 0x0000ffff; +} + +static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) +{ + struct ccp_cmd_queue *cmd_q = op->cmd_q; + struct ccp_device *ccp = cmd_q->ccp; + void __iomem *cr_addr; + u32 cr0, cmd; + unsigned int i; + int ret = 0; + + /* We could read a status register to see how many free slots + * are actually available, but reading that register resets it + * and you could lose some error information. + */ + cmd_q->free_slots--; + + cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT) + | (op->jobid << REQ0_JOBID_SHIFT) + | REQ0_WAIT_FOR_WRITE; + + if (op->soc) + cr0 |= REQ0_STOP_ON_COMPLETE + | REQ0_INT_ON_COMPLETE; + + if (op->ioc || !cmd_q->free_slots) + cr0 |= REQ0_INT_ON_COMPLETE; + + /* Start at CMD_REQ1 */ + cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR; + + mutex_lock(&ccp->req_mutex); + + /* Write CMD_REQ1 through CMD_REQx first */ + for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR) + iowrite32(*(cr + i), cr_addr); + + /* Tell the CCP to start */ + wmb(); + iowrite32(cr0, ccp->io_regs + CMD_REQ0); + + mutex_unlock(&ccp->req_mutex); + + if (cr0 & REQ0_INT_ON_COMPLETE) { + /* Wait for the job to complete */ + ret = wait_event_interruptible(cmd_q->int_queue, + cmd_q->int_rcvd); + if (ret || cmd_q->cmd_error) { + /* On error delete all related jobs from the queue */ + cmd = (cmd_q->id << DEL_Q_ID_SHIFT) + | op->jobid; + + iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); + + if (!ret) + ret = -EIO; + } else if (op->soc) { + /* Delete just head job from the queue on SoC */ + cmd = DEL_Q_ACTIVE + | (cmd_q->id << DEL_Q_ID_SHIFT) + | op->jobid; + + iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); + } + + cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status); + + cmd_q->int_rcvd = 0; + } + + return ret; +} + +static int ccp_perform_aes(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT) + | (op->u.aes.type << REQ1_AES_TYPE_SHIFT) + | (op->u.aes.mode << REQ1_AES_MODE_SHIFT) + | (op->u.aes.action << REQ1_AES_ACTION_SHIFT) + | (op->ksb_key << REQ1_KEY_KSB_SHIFT); + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + if (op->u.aes.mode == CCP_AES_MODE_CFB) + cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT); + + if (op->eom) + cr[0] |= REQ1_EOM; + + if (op->init) + cr[0] |= REQ1_INIT; + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_xts_aes(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) + | (op->u.xts.action << REQ1_AES_ACTION_SHIFT) + | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) + | (op->ksb_key << REQ1_KEY_KSB_SHIFT); + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + if (op->eom) + cr[0] |= REQ1_EOM; + + if (op->init) + cr[0] |= REQ1_INIT; + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_sha(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT) + | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT) + | REQ1_INIT; + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + + if (op->eom) { + cr[0] |= REQ1_EOM; + cr[4] = lower_32_bits(op->u.sha.msg_bits); + cr[5] = upper_32_bits(op->u.sha.msg_bits); + } else { + cr[4] = 0; + cr[5] = 0; + } + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_rsa(struct ccp_op *op) +{ + u32 c |