diff options
Diffstat (limited to 'drivers/crypto/nx')
| -rw-r--r-- | drivers/crypto/nx/Kconfig | 26 | ||||
| -rw-r--r-- | drivers/crypto/nx/Makefile | 5 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx-842.c | 1617 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx-aes-cbc.c | 59 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx-aes-ccm.c | 285 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx-aes-ctr.c | 54 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx-aes-ecb.c | 52 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx-aes-gcm.c | 300 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx-aes-xcbc.c | 205 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx-sha256.c | 128 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx-sha512.c | 134 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx.c | 94 | ||||
| -rw-r--r-- | drivers/crypto/nx/nx.h | 3 |
13 files changed, 2565 insertions, 397 deletions
diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig new file mode 100644 index 00000000000..f82616621ae --- /dev/null +++ b/drivers/crypto/nx/Kconfig @@ -0,0 +1,26 @@ +config CRYPTO_DEV_NX_ENCRYPT + tristate "Encryption acceleration support" + depends on PPC64 && IBMVIO + default y + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_ECB + select CRYPTO_CCM + select CRYPTO_GCM + select CRYPTO_AUTHENC + select CRYPTO_XCBC + select CRYPTO_SHA256 + select CRYPTO_SHA512 + help + Support for Power7+ in-Nest encryption acceleration. This + module supports acceleration for AES and SHA2 algorithms. If you + choose 'M' here, this module will be called nx_crypto. + +config CRYPTO_DEV_NX_COMPRESS + tristate "Compression acceleration support" + depends on PPC64 && IBMVIO + default y + help + Support for Power7+ in-Nest compression acceleration. This + module supports acceleration for AES and SHA2 algorithms. If you + choose 'M' here, this module will be called nx_compress. diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile index 411ce59c80d..bb770ea45ce 100644 --- a/drivers/crypto/nx/Makefile +++ b/drivers/crypto/nx/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_CRYPTO_DEV_NX) += nx-crypto.o +obj-$(CONFIG_CRYPTO_DEV_NX_ENCRYPT) += nx-crypto.o nx-crypto-objs := nx.o \ nx_debugfs.o \ nx-aes-cbc.o \ @@ -9,3 +9,6 @@ nx-crypto-objs := nx.o \ nx-aes-xcbc.o \ nx-sha256.o \ nx-sha512.o + +obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o +nx-compress-objs := nx-842.o diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c new file mode 100644 index 00000000000..502edf0a293 --- /dev/null +++ b/drivers/crypto/nx/nx-842.c @@ -0,0 +1,1617 @@ +/* + * Driver for IBM Power 842 compression accelerator + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Authors: Robert Jennings <rcj@linux.vnet.ibm.com> + * Seth Jennings <sjenning@linux.vnet.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/nx842.h> +#include <linux/of.h> +#include <linux/slab.h> + +#include <asm/page.h> +#include <asm/vio.h> + +#include "nx_csbcpb.h" /* struct nx_csbcpb */ + +#define MODULE_NAME "nx-compress" +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); + +#define SHIFT_4K 12 +#define SHIFT_64K 16 +#define SIZE_4K (1UL << SHIFT_4K) +#define SIZE_64K (1UL << SHIFT_64K) + +/* IO buffer must be 128 byte aligned */ +#define IO_BUFFER_ALIGN 128 + +struct nx842_header { + int blocks_nr; /* number of compressed blocks */ + int offset; /* offset of the first block (from beginning of header) */ + int sizes[0]; /* size of compressed blocks */ +}; + +static inline int nx842_header_size(const struct nx842_header *hdr) +{ + return sizeof(struct nx842_header) + + hdr->blocks_nr * sizeof(hdr->sizes[0]); +} + +/* Macros for fields within nx_csbcpb */ +/* Check the valid bit within the csbcpb valid field */ +#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) + +/* CE macros operate on the completion_extension field bits in the csbcpb. + * CE0 0=full completion, 1=partial completion + * CE1 0=CE0 indicates completion, 1=termination (output may be modified) + * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ +#define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) +#define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) +#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) + +/* The NX unit accepts data only on 4K page boundaries */ +#define NX842_HW_PAGE_SHIFT SHIFT_4K +#define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT) +#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) + +enum nx842_status { + UNAVAILABLE, + AVAILABLE +}; + +struct ibm_nx842_counters { + atomic64_t comp_complete; + atomic64_t comp_failed; + atomic64_t decomp_complete; + atomic64_t decomp_failed; + atomic64_t swdecomp; + atomic64_t comp_times[32]; + atomic64_t decomp_times[32]; +}; + +static struct nx842_devdata { + struct vio_dev *vdev; + struct device *dev; + struct ibm_nx842_counters *counters; + unsigned int max_sg_len; + unsigned int max_sync_size; + unsigned int max_sync_sg; + enum nx842_status status; +} __rcu *devdata; +static DEFINE_SPINLOCK(devdata_mutex); + +#define NX842_COUNTER_INC(_x) \ +static inline void nx842_inc_##_x( \ + const struct nx842_devdata *dev) { \ + if (dev) \ + atomic64_inc(&dev->counters->_x); \ +} +NX842_COUNTER_INC(comp_complete); +NX842_COUNTER_INC(comp_failed); +NX842_COUNTER_INC(decomp_complete); +NX842_COUNTER_INC(decomp_failed); +NX842_COUNTER_INC(swdecomp); + +#define NX842_HIST_SLOTS 16 + +static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) +{ + int bucket = fls(time); + + if (bucket) + bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); + + atomic64_inc(×[bucket]); +} + +/* NX unit operation flags */ +#define NX842_OP_COMPRESS 0x0 +#define NX842_OP_CRC 0x1 +#define NX842_OP_DECOMPRESS 0x2 +#define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) +#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) +#define NX842_OP_ASYNC (1<<23) +#define NX842_OP_NOTIFY (1<<22) +#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) + +static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) +{ + /* No use of DMA mappings within the driver. */ + return 0; +} + +struct nx842_slentry { + unsigned long ptr; /* Real address (use __pa()) */ + unsigned long len; +}; + +/* pHyp scatterlist entry */ +struct nx842_scatterlist { + int entry_nr; /* number of slentries */ + struct nx842_slentry *entries; /* ptr to array of slentries */ +}; + +/* Does not include sizeof(entry_nr) in the size */ +static inline unsigned long nx842_get_scatterlist_size( + struct nx842_scatterlist *sl) +{ + return sl->entry_nr * sizeof(struct nx842_slentry); +} + +static inline unsigned long nx842_get_pa(void *addr) +{ + if (is_vmalloc_addr(addr)) + return page_to_phys(vmalloc_to_page(addr)) + + offset_in_page(addr); + else + return __pa(addr); +} + +static int nx842_build_scatterlist(unsigned long buf, int len, + struct nx842_scatterlist *sl) +{ + unsigned long nextpage; + struct nx842_slentry *entry; + + sl->entry_nr = 0; + + entry = sl->entries; + while (len) { + entry->ptr = nx842_get_pa((void *)buf); + nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE); + if (nextpage < buf + len) { + /* we aren't at the end yet */ + if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE)) + /* we are in the middle (or beginning) */ + entry->len = NX842_HW_PAGE_SIZE; + else + /* we are at the beginning */ + entry->len = nextpage - buf; + } else { + /* at the end */ + entry->len = len; + } + + len -= entry->len; + buf += entry->len; + sl->entry_nr++; + entry++; + } + + return 0; +} + +/* + * Working memory for software decompression + */ +struct sw842_fifo { + union { + char f8[256][8]; + char f4[512][4]; + }; + char f2[256][2]; + unsigned char f84_full; + unsigned char f2_full; + unsigned char f8_count; + unsigned char f2_count; + unsigned int f4_count; +}; + +/* + * Working memory for crypto API + */ +struct nx842_workmem { + char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */ + union { + /* hardware working memory */ + struct { + /* scatterlist */ + char slin[SIZE_4K]; + char slout[SIZE_4K]; + /* coprocessor status/parameter block */ + struct nx_csbcpb csbcpb; + }; + /* software working memory */ + struct sw842_fifo swfifo; /* software decompression fifo */ + }; +}; + +int nx842_get_workmem_size(void) +{ + return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE; +} +EXPORT_SYMBOL_GPL(nx842_get_workmem_size); + +int nx842_get_workmem_size_aligned(void) +{ + return sizeof(struct nx842_workmem); +} +EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned); + +static int nx842_validate_result(struct device *dev, + struct cop_status_block *csb) +{ + /* The csb must be valid after returning from vio_h_cop_sync */ + if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { + dev_err(dev, "%s: cspcbp not valid upon completion.\n", + __func__); + dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", + csb->valid, + csb->crb_seq_number, + csb->completion_code, + csb->completion_extension); + dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", + csb->processed_byte_count, + (unsigned long)csb->address); + return -EIO; + } + + /* Check return values from the hardware in the CSB */ + switch (csb->completion_code) { + case 0: /* Completed without error */ + break; + case 64: /* Target bytes > Source bytes during compression */ + case 13: /* Output buffer too small */ + dev_dbg(dev, "%s: Compression output larger than input\n", + __func__); + return -ENOSPC; + case 66: /* Input data contains an illegal template field */ + case 67: /* Template indicates data past the end of the input stream */ + dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", + __func__, csb->completion_code); + return -EINVAL; + default: + dev_dbg(dev, "%s: Unspecified error (code:%d)\n", + __func__, csb->completion_code); + return -EIO; + } + + /* Hardware sanity check */ + if (!NX842_CSBCPB_CE2(csb->completion_extension)) { + dev_err(dev, "%s: No error returned by hardware, but " + "data returned is unusable, contact support.\n" + "(Additional info: csbcbp->processed bytes " + "does not specify processed bytes for the " + "target buffer.)\n", __func__); + return -EIO; + } + + return 0; +} + +/** + * nx842_compress - Compress data using the 842 algorithm + * + * Compression provide by the NX842 coprocessor on IBM Power systems. + * The input buffer is compressed and the result is stored in the + * provided output buffer. + * + * Upon return from this function @outlen contains the length of the + * compressed data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * @in: Pointer to input buffer, must be page aligned + * @inlen: Length of input buffer, must be PAGE_SIZE + * @out: Pointer to output buffer + * @outlen: Length of output buffer + * @wrkmem: ptr to buffer for working memory, size determined by + * nx842_get_workmem_size() + * + * Returns: + * 0 Success, output of length @outlen stored in the buffer at @out + * -ENOMEM Unable to allocate internal buffers + * -ENOSPC Output buffer is to small + * -EMSGSIZE XXX Difficult to describe this limitation + * -EIO Internal error + * -ENODEV Hardware unavailable + */ +int nx842_compress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlen, void *wmem) +{ + struct nx842_header *hdr; + struct nx842_devdata *local_devdata; + struct device *dev = NULL; + struct nx842_workmem *workmem; + struct nx842_scatterlist slin, slout; + struct nx_csbcpb *csbcpb; + int ret = 0, max_sync_size, i, bytesleft, size, hdrsize; + unsigned long inbuf, outbuf, padding; + struct vio_pfo_op op = { + .done = NULL, + .handle = 0, + .timeout = 0, + }; + unsigned long start_time = get_tb(); + + /* + * Make sure input buffer is 64k page aligned. This is assumed since + * this driver is designed for page compression only (for now). This + * is very nice since we can now use direct DDE(s) for the input and + * the alignment is guaranteed. + */ + inbuf = (unsigned long)in; + if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE) + return -EINVAL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata || !local_devdata->dev) { + rcu_read_unlock(); + return -ENODEV; + } + max_sync_size = local_devdata->max_sync_size; + dev = local_devdata->dev; + + /* Create the header */ + hdr = (struct nx842_header *)out; + hdr->blocks_nr = PAGE_SIZE / max_sync_size; + hdrsize = nx842_header_size(hdr); + outbuf = (unsigned long)out + hdrsize; + bytesleft = *outlen - hdrsize; + + /* Init scatterlist */ + workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, + NX842_HW_PAGE_SIZE); + slin.entries = (struct nx842_slentry *)workmem->slin; + slout.entries = (struct nx842_slentry *)workmem->slout; + + /* Init operation */ + op.flags = NX842_OP_COMPRESS; + csbcpb = &workmem->csbcpb; + memset(csbcpb, 0, sizeof(*csbcpb)); + op.csbcpb = nx842_get_pa(csbcpb); + op.out = nx842_get_pa(slout.entries); + + for (i = 0; i < hdr->blocks_nr; i++) { + /* + * Aligning the output blocks to 128 bytes does waste space, + * but it prevents the need for bounce buffers and memory + * copies. It also simplifies the code a lot. In the worst + * case (64k page, 4k max_sync_size), you lose up to + * (128*16)/64k = ~3% the compression factor. For 64k + * max_sync_size, the loss would be at most 128/64k = ~0.2%. + */ + padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf; + outbuf += padding; + bytesleft -= padding; + if (i == 0) + /* save offset into first block in header */ + hdr->offset = padding + hdrsize; + + if (bytesleft <= 0) { + ret = -ENOSPC; + goto unlock; + } + + /* + * NOTE: If the default max_sync_size is changed from 4k + * to 64k, remove the "likely" case below, since a + * scatterlist will always be needed. + */ + if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { + /* Create direct DDE */ + op.in = nx842_get_pa((void *)inbuf); + op.inlen = max_sync_size; + + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(inbuf, max_sync_size, &slin); + op.in = nx842_get_pa(slin.entries); + op.inlen = -nx842_get_scatterlist_size(&slin); + } + + /* + * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect + * DDE is required for the outbuf. + * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must + * also be page aligned (1 in 128/4k=32 chance) in order + * to use a direct DDE. + * This is unlikely, just use an indirect DDE always. + */ + nx842_build_scatterlist(outbuf, + min(bytesleft, max_sync_size), &slout); + /* op.out set before loop */ + op.outlen = -nx842_get_scatterlist_size(&slout); + + /* Send request to pHyp */ + ret = vio_h_cop_sync(local_devdata->vdev, &op); + + /* Check for pHyp error */ + if (ret) { + dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", + __func__, ret, op.hcall_err); + ret = -EIO; + goto unlock; + } + + /* Check for hardware error */ + ret = nx842_validate_result(dev, &csbcpb->csb); + if (ret && ret != -ENOSPC) + goto unlock; + + /* Handle incompressible data */ + if (unlikely(ret == -ENOSPC)) { + if (bytesleft < max_sync_size) { + /* + * Not enough space left in the output buffer + * to store uncompressed block + */ + goto unlock; + } else { + /* Store incompressible block */ + memcpy((void *)outbuf, (void *)inbuf, + max_sync_size); + hdr->sizes[i] = -max_sync_size; + outbuf += max_sync_size; + bytesleft -= max_sync_size; + /* Reset ret, incompressible data handled */ + ret = 0; + } + } else { + /* Normal case, compression was successful */ + size = csbcpb->csb.processed_byte_count; + dev_dbg(dev, "%s: processed_bytes=%d\n", + __func__, size); + hdr->sizes[i] = size; + outbuf += size; + bytesleft -= size; + } + + inbuf += max_sync_size; + } + + *outlen = (unsigned int)(outbuf - (unsigned long)out); + +unlock: + if (ret) + nx842_inc_comp_failed(local_devdata); + else { + nx842_inc_comp_complete(local_devdata); + ibm_nx842_incr_hist(local_devdata->counters->comp_times, + (get_tb() - start_time) / tb_ticks_per_usec); + } + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nx842_compress); + +static int sw842_decompress(const unsigned char *, int, unsigned char *, int *, + const void *); + +/** + * nx842_decompress - Decompress data using the 842 algorithm + * + * Decompression provide by the NX842 coprocessor on IBM Power systems. + * The input buffer is decompressed and the result is stored in the + * provided output buffer. The size allocated to the output buffer is + * provided by the caller of this function in @outlen. Upon return from + * this function @outlen contains the length of the decompressed data. + * If there is an error then @outlen will be 0 and an error will be + * specified by the return code from this function. + * + * @in: Pointer to input buffer, will use bounce buffer if not 128 byte + * aligned + * @inlen: Length of input buffer + * @out: Pointer to output buffer, must be page aligned + * @outlen: Length of output buffer, must be PAGE_SIZE + * @wrkmem: ptr to buffer for working memory, size determined by + * nx842_get_workmem_size() + * + * Returns: + * 0 Success, output of length @outlen stored in the buffer at @out + * -ENODEV Hardware decompression device is unavailable + * -ENOMEM Unable to allocate internal buffers + * -ENOSPC Output buffer is to small + * -EINVAL Bad input data encountered when attempting decompress + * -EIO Internal error + */ +int nx842_decompress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlen, void *wmem) +{ + struct nx842_header *hdr; + struct nx842_devdata *local_devdata; + struct device *dev = NULL; + struct nx842_workmem *workmem; + struct nx842_scatterlist slin, slout; + struct nx_csbcpb *csbcpb; + int ret = 0, i, size, max_sync_size; + unsigned long inbuf, outbuf; + struct vio_pfo_op op = { + .done = NULL, + .handle = 0, + .timeout = 0, + }; + unsigned long start_time = get_tb(); + + /* Ensure page alignment and size */ + outbuf = (unsigned long)out; + if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE) + return -EINVAL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (local_devdata) + dev = local_devdata->dev; + + /* Get header */ + hdr = (struct nx842_header *)in; + + workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, + NX842_HW_PAGE_SIZE); + + inbuf = (unsigned long)in + hdr->offset; + if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) { + /* Copy block(s) into bounce buffer for alignment */ + memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset); + inbuf = (unsigned long)workmem->bounce; + } + + /* Init scatterlist */ + slin.entries = (struct nx842_slentry *)workmem->slin; + slout.entries = (struct nx842_slentry *)workmem->slout; + + /* Init operation */ + op.flags = NX842_OP_DECOMPRESS; + csbcpb = &workmem->csbcpb; + memset(csbcpb, 0, sizeof(*csbcpb)); + op.csbcpb = nx842_get_pa(csbcpb); + + /* + * max_sync_size may have changed since compression, + * so we can't read it from the device info. We need + * to derive it from hdr->blocks_nr. + */ + max_sync_size = PAGE_SIZE / hdr->blocks_nr; + + for (i = 0; i < hdr->blocks_nr; i++) { + /* Skip padding */ + inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN); + + if (hdr->sizes[i] < 0) { + /* Negative sizes indicate uncompressed data blocks */ + size = abs(hdr->sizes[i]); + memcpy((void *)outbuf, (void *)inbuf, size); + outbuf += size; + inbuf += size; + continue; + } + + if (!dev) + goto sw; + + /* + * The better the compression, the more likely the "likely" + * case becomes. + */ + if (likely((inbuf & NX842_HW_PAGE_MASK) == + ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) { + /* Create direct DDE */ + op.in = nx842_get_pa((void *)inbuf); + op.inlen = hdr->sizes[i]; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin); + op.in = nx842_get_pa(slin.entries); + op.inlen = -nx842_get_scatterlist_size(&slin); + } + + /* + * NOTE: If the default max_sync_size is changed from 4k + * to 64k, remove the "likely" case below, since a + * scatterlist will always be needed. + */ + if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { + /* Create direct DDE */ + op.out = nx842_get_pa((void *)outbuf); + op.outlen = max_sync_size; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(outbuf, max_sync_size, &slout); + op.out = nx842_get_pa(slout.entries); + op.outlen = -nx842_get_scatterlist_size(&slout); + } + + /* Send request to pHyp */ + ret = vio_h_cop_sync(local_devdata->vdev, &op); + + /* Check for pHyp error */ + if (ret) { + dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", + __func__, ret, op.hcall_err); + dev = NULL; + goto sw; + } + + /* Check for hardware error */ + ret = nx842_validate_result(dev, &csbcpb->csb); + if (ret) { + dev = NULL; + goto sw; + } + + /* HW decompression success */ + inbuf += hdr->sizes[i]; + outbuf += csbcpb->csb.processed_byte_count; + continue; + +sw: + /* software decompression */ + size = max_sync_size; + ret = sw842_decompress( + (unsigned char *)inbuf, hdr->sizes[i], + (unsigned char *)outbuf, &size, wmem); + if (ret) + pr_debug("%s: sw842_decompress failed with %d\n", + __func__, ret); + + if (ret) { + if (ret != -ENOSPC && ret != -EINVAL && + ret != -EMSGSIZE) + ret = -EIO; + goto unlock; + } + + /* SW decompression success */ + inbuf += hdr->sizes[i]; + outbuf += size; + } + + *outlen = (unsigned int)(outbuf - (unsigned long)out); + +unlock: + if (ret) + /* decompress fail */ + nx842_inc_decomp_failed(local_devdata); + else { + if (!dev) + /* software decompress */ + nx842_inc_swdecomp(local_devdata); + nx842_inc_decomp_complete(local_devdata); + ibm_nx842_incr_hist(local_devdata->counters->decomp_times, + (get_tb() - start_time) / tb_ticks_per_usec); + } + + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nx842_decompress); + +/** + * nx842_OF_set_defaults -- Set default (disabled) values for devdata + * + * @devdata - struct nx842_devdata to update + * + * Returns: + * 0 on success + * -ENOENT if @devdata ptr is NULL + */ +static int nx842_OF_set_defaults(struct nx842_devdata *devdata) +{ + if (devdata) { + devdata->max_sync_size = 0; + devdata->max_sync_sg = 0; + devdata->max_sg_len = 0; + devdata->status = UNAVAILABLE; + return 0; + } else + return -ENOENT; +} + +/** + * nx842_OF_upd_status -- Update the device info from OF status prop + * + * The status property indicates if the accelerator is enabled. If the + * device is in the OF tree it indicates that the hardware is present. + * The status field indicates if the device is enabled when the status + * is 'okay'. Otherwise the device driver will be disabled. + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 - Device is available + * -EINVAL - Device is not available + */ +static int nx842_OF_upd_status(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + const char *status = (const char *)prop->value; + + if (!strncmp(status, "okay", (size_t)prop->length)) { + devdata->status = AVAILABLE; + } else { + dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n", + __func__, status); + devdata->status = UNAVAILABLE; + } + + return ret; +} + +/** + * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop + * + * Definition of the 'ibm,max-sg-len' OF property: + * This field indicates the maximum byte length of a scatter list + * for the platform facility. It is a single cell encoded as with encode-int. + * + * Example: + * # od -x ibm,max-sg-len + * 0000000 0000 0ff0 + * + * In this example, the maximum byte length of a scatter list is + * 0x0ff0 (4,080). + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 on success + * -EINVAL on failure + */ +static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + const int *maxsglen = prop->value; + + if (prop->length != sizeof(*maxsglen)) { + dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); + dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, + prop->length, sizeof(*maxsglen)); + ret = -EINVAL; + } else { + devdata->max_sg_len = (unsigned int)min(*maxsglen, + (int)NX842_HW_PAGE_SIZE); + } + + return ret; +} + +/** + * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop + * + * Definition of the 'ibm,max-sync-cop' OF property: + * Two series of cells. The first series of cells represents the maximums + * that can be synchronously compressed. The second series of cells + * represents the maximums that can be synchronously decompressed. + * 1. The first cell in each series contains the count of the number of + * data length, scatter list elements pairs that follow – each being + * of the form + * a. One cell data byte length + * b. One cell total number of scatter list elements + * + * Example: + * # od -x ibm,max-sync-cop + * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 + * 0000020 0000 1000 0000 01fe + * + * In this example, compression supports 0x1000 (4,096) data byte length + * and 0x1fe (510) total scatter list elements. Decompression supports + * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list + * elements. + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 on success + * -EINVAL on failure + */ +static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + const struct maxsynccop_t { + int comp_elements; + int comp_data_limit; + int comp_sg_limit; + int decomp_elements; + int decomp_data_limit; + int decomp_sg_limit; + } *maxsynccop; + + if (prop->length != sizeof(*maxsynccop)) { + dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); + dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, + sizeof(*maxsynccop)); + ret = -EINVAL; + goto out; + } + + maxsynccop = (const struct maxsynccop_t *)prop->value; + + /* Use one limit rather than separate limits for compression and + * decompression. Set a maximum for this so as not to exceed the + * size that the header can support and round the value down to + * the hardware page size (4K) */ + devdata->max_sync_size = + (unsigned int)min(maxsynccop->comp_data_limit, + maxsynccop->decomp_data_limit); + + devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, + SIZE_64K); + + if (devdata->max_sync_size < SIZE_4K) { + dev_err(devdata->dev, "%s: hardware max data size (%u) is " + "less than the driver minimum, unable to use " + "the hardware device\n", + __func__, devdata->max_sync_size); + ret = -EINVAL; + goto out; + } + + devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit, + maxsynccop->decomp_sg_limit); + if (devdata->max_sync_sg < 1) { + dev_err(devdata->dev, "%s: hardware max sg size (%u) is " + "less than the driver minimum, unable to use " + "the hardware device\n", + __func__, devdata->max_sync_sg); + ret = -EINVAL; + goto out; + } + +out: + return ret; +} + +/** + * + * nx842_OF_upd -- Handle OF properties updates for the device. + * + * Set all properties from the OF tree. Optionally, a new property + * can be provided by the @new_prop pointer to overwrite an existing value. + * The device will remain disabled until all values are valid, this function + * will return an error for updates unless all values are valid. + * + * @new_prop: If not NULL, this property is being updated. If NULL, update + * all properties from the current values in the OF tree. + * + * Returns: + * 0 - Success + * -ENOMEM - Could not allocate memory for new devdata structure + * -EINVAL - property value not found, new_prop is not a recognized + * property for the device or property value is not valid. + * -ENODEV - Device is not available + */ +static int nx842_OF_upd(struct property *new_prop) +{ + struct nx842_devdata *old_devdata = NULL; + struct nx842_devdata *new_devdata = NULL; + struct device_node *of_node = NULL; + struct property *status = NULL; + struct property *maxsglen = NULL; + struct property *maxsyncop = NULL; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + if (old_devdata) + of_node = old_devdata->dev->of_node; + + if (!old_devdata || !of_node) { + pr_err("%s: device is not available\n", __func__); + spin_unlock_irqrestore(&devdata_mutex, flags); + return -ENODEV; + } + + new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); + if (!new_devdata) { + dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__); + ret = -ENOMEM; + goto error_out; + } + + memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); + new_devdata->counters = old_devdata->counters; + + /* Set ptrs for existing properties */ + status = of_find_property(of_node, "status", NULL); + maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); + maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); + if (!status || !maxsglen || !maxsyncop) { + dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); + ret = -EINVAL; + goto error_out; + } + + /* Set ptr to new property if provided */ + if (new_prop) { + /* Single property */ + if (!strncmp(new_prop->name, "status", new_prop->length)) { + status = new_prop; + + } else if (!strncmp(new_prop->name, "ibm,max-sg-len", + new_prop->length)) { + maxsglen = new_prop; + + } else if (!strncmp(new_prop->name, "ibm,max-sync-cop", + new_prop->length)) { + maxsyncop = new_prop; + + } else { + /* + * Skip the update, the property being updated + * has no impact. + */ + goto out; + } + } + + /* Perform property updates */ + ret = nx842_OF_upd_status(new_devdata, status); + if (ret) + goto error_out; + + ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); + if (ret) + goto error_out; + + ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); + if (ret) + goto error_out; + +out: + dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", + __func__, new_devdata->max_sync_size, + old_devdata->max_sync_size); + dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", + __func__, new_devdata->max_sync_sg, + old_devdata->max_sync_sg); + dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", + __func__, new_devdata->max_sg_len, + old_devdata->max_sg_len); + + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(new_devdata->dev, new_devdata); + kfree(old_devdata); + return 0; + +error_out: + if (new_devdata) { + dev_info(old_devdata->dev, "%s: device disabled\n", __func__); + nx842_OF_set_defaults(new_devdata); + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(new_devdata->dev, new_devdata); + kfree(old_devdata); + } else { + dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); + spin_unlock_irqrestore(&devdata_mutex, flags); + } + + if (!ret) + ret = -EINVAL; + return ret; +} + +/** + * nx842_OF_notifier - Process updates to OF properties for the device + * + * @np: notifier block + * @action: notifier action + * @update: struct pSeries_reconfig_prop_update pointer if action is + * PSERIES_UPDATE_PROPERTY + * + * Returns: + * NOTIFY_OK on success + * NOTIFY_BAD encoded with error number on failure, use + * notifier_to_errno() to decode this value + */ +static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, + void *update) +{ + struct of_prop_reconfig *upd = update; + struct nx842_devdata *local_devdata; + struct device_node *node = NULL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (local_devdata) + node = local_devdata->dev->of_node; + + if (local_devdata && + action == OF_RECONFIG_UPDATE_PROPERTY && + !strcmp(upd->dn->name, node->name)) { + rcu_read_unlock(); + nx842_OF_upd(upd->prop); + } else + rcu_read_unlock(); + + return NOTIFY_OK; +} + +static struct notifier_block nx842_of_nb = { + .notifier_call = nx842_OF_notifier, +}; + +#define nx842_counter_read(_name) \ +static ssize_t nx842_##_name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) { \ + struct nx842_devdata *local_devdata; \ + int p = 0; \ + rcu_read_lock(); \ + local_devdata = rcu_dereference(devdata); \ + if (local_devdata) \ + p = snprintf(buf, PAGE_SIZE, "%ld\n", \ + atomic64_read(&local_devdata->counters->_name)); \ + rcu_read_unlock(); \ + return p; \ +} + +#define NX842DEV_COUNTER_ATTR_RO(_name) \ + nx842_counter_read(_name); \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, \ + 0444, \ + nx842_##_name##_show,\ + NULL); + +NX842DEV_COUNTER_ATTR_RO(comp_complete); +NX842DEV_COUNTER_ATTR_RO(comp_failed); +NX842DEV_COUNTER_ATTR_RO(decomp_complete); +NX842DEV_COUNTER_ATTR_RO(decomp_failed); +NX842DEV_COUNTER_ATTR_RO(swdecomp); + +static ssize_t nx842_timehist_show(struct device *, + struct device_attribute *, char *); + +static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, + nx842_timehist_show, NULL); +static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, + 0444, nx842_timehist_show, NULL); + +static ssize_t nx842_timehist_show(struct device *dev, + struct device_attribute *attr, char *buf) { + char *p = buf; + struct nx842_devdata *local_devdata; + atomic64_t *times; + int bytes_remain = PAGE_SIZE; + int bytes; + int i; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata) { + rcu_read_unlock(); + return 0; + } + + if (attr == &dev_attr_comp_times) + times = local_devdata->counters->comp_times; + else if (attr == &dev_attr_decomp_times) + times = local_devdata->counters->decomp_times; + else { + rcu_read_unlock(); + return 0; + } + + for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { + bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n", + i ? (2<<(i-1)) : 0, (2<<i)-1, + atomic64_read(×[i])); + bytes_remain -= bytes; + p += bytes; + } + /* The last bucket holds everything over + * 2<<(NX842_HIST_SLOTS - 2) us */ + bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n", + 2<<(NX842_HIST_SLOTS - 2), + atomic64_read(×[(NX842_HIST_SLOTS - 1)])); + p += bytes; + + rcu_read_unlock(); + return p - buf; +} + +static struct attribute *nx842_sysfs_entries[] = { + &dev_attr_comp_complete.attr, + &dev_attr_comp_failed.attr, + &dev_attr_decomp_complete.attr, + &dev_attr_decomp_failed.attr, + &dev_attr_swdecomp.attr, + &dev_attr_comp_times.attr, + &dev_attr_decomp_times.attr, + NULL, +}; + +static struct attribute_group nx842_attribute_group = { + .name = NULL, /* put in device directory */ + .attrs = nx842_sysfs_entries, +}; + +static int __init nx842_probe(struct vio_dev *viodev, + const struct vio_device_id *id) +{ + struct nx842_devdata *old_devdata, *new_devdata = NULL; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + + if (old_devdata && old_devdata->vdev != NULL) { + dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); + ret = -1; + goto error_unlock; + } + + dev_set_drvdata(&viodev->dev, NULL); + + new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); + if (!new_devdata) { + dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__); + ret = -ENOMEM; + goto error_unlock; + } + + new_devdata->counters = kzalloc(sizeof(*new_devdata->counters), + GFP_NOFS); + if (!new_devdata->counters) { + dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__); + ret = -ENOMEM; + goto error_unlock; + } + + new_devdata->vdev = viodev; + new_devdata->dev = &viodev->dev; + nx842_OF_set_defaults(new_devdata); + + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + kfree(old_devdata); + + of_reconfig_notifier_register(&nx842_of_nb); + + ret = nx842_OF_upd(NULL); + if (ret && ret != -ENODEV) { + dev_err(&viodev->dev, "could not parse device tree. %d\n", ret); + ret = -1; + goto error; + } + + rcu_read_lock(); + dev_set_drvdata(&viodev->dev, rcu_dereference(devdata)); + rcu_read_unlock(); + + if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { + dev_err(&viodev->dev, "could not create sysfs device attributes\n"); + ret = -1; + goto error; + } + + return 0; + +error_unlock: + spin_unlock_irqrestore(&devdata_mutex, flags); + if (new_devdata) + kfree(new_devdata->counters); + kfree(new_devdata); +error: + return ret; +} + +static int __exit nx842_remove(struct vio_dev *viodev) +{ + struct nx842_devdata *old_devdata; + unsigned long flags; + + pr_info("Removing IBM Power 842 compression device\n"); + sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + of_reconfig_notifier_unregister(&nx842_of_nb); + RCU_INIT_POINTER(devdata, NULL); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(&viodev->dev, NULL); + if (old_devdata) + kfree(old_devdata->counters); + kfree(old_devdata); + return 0; +} + +static struct vio_device_id nx842_driver_ids[] = { + {"ibm,compression-v1", "ibm,compression"}, + {"", ""}, +}; + +static struct vio_driver nx842_driver = { + .name = MODULE_NAME, + .probe = nx842_probe, + .remove = nx842_remove, + .get_desired_dma = nx842_get_desired_dma, + .id_table = nx842_driver_ids, +}; + +static int __init nx842_init(void) +{ + struct nx842_devdata *new_devdata; + pr_info("Registering IBM Power 842 compression driver\n"); + + RCU_INIT_POINTER(devdata, NULL); + new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); + if (!new_devdata) { + pr_err("Could not allocate memory for device data\n"); + return -ENOMEM; + } + new_devdata->status = UNAVAILABLE; + RCU_INIT_POINTER(devdata, new_devdata); + + return vio_register_driver(&nx842_driver); +} + +module_init(nx842_init); + +static void __exit nx842_exit(void) +{ + struct nx842_devdata *old_devdata; + unsigned long flags; + + pr_info("Exiting IBM Power 842 compression driver\n"); + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + RCU_INIT_POINTER(devdata, NULL); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + if (old_devdata) + dev_set_drvdata(old_devdata->dev, NULL); + kfree(old_devdata); + vio_unregister_driver(&nx842_driver); +} + +module_exit(nx842_exit); + +/********************************* + * 842 software decompressor +*********************************/ +typedef int (*sw842_template_op)(const char **, int *, unsigned char **, + struct sw842_fifo *); + +static int sw842_data8(const char **, int *, unsigned char **, + struct sw842_fifo *); +static int sw842_data4(const char **, int *, unsigned char **, + struct sw842_fifo *); +static int sw842_data2(const char **, int *, unsigned char **, + struct sw842_fifo *); +static int sw842_ptr8(const char **, int *, unsigned char **, + struct sw842_fifo *); +static int sw842_ptr4(const char **, int *, unsigned char **, + struct sw842_fifo *); +static int sw842_ptr2(const char **, int *, unsigned char **, + struct sw842_fifo *); + +/* special templates */ +#define SW842_TMPL_REPEAT 0x1B +#define SW842_TMPL_ZEROS 0x1C +#define SW842_TMPL_EOF 0x1E + +static sw842_template_op sw842_tmpl_ops[26][4] = { + { sw842_data8, NULL}, /* 0 (00000) */ + { sw842_data4, sw842_data2, sw842_ptr2, NULL}, + { sw842_data4, sw842_ptr2, sw842_data2, NULL}, + { sw842_data4, sw842_ptr2, sw842_ptr2, NULL}, + { sw842_data4, sw842_ptr4, NULL}, + { sw842_data2, sw842_ptr2, sw842_data4, NULL}, + { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2}, + { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2}, + { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,}, + { sw842_data2, sw842_ptr2, sw842_ptr4, NULL}, + { sw842_ptr2, sw842_data2, sw842_data4, NULL}, /* 10 (01010) */ + { sw842_ptr2, sw842_data4, sw842_ptr2, NULL}, + { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2}, + { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2}, + { sw842_ptr2, sw842_data2, sw842_ptr4, NULL}, + { sw842_ptr2, sw842_ptr2, sw842_data4, NULL}, + { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2}, + { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2}, + { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2}, + { sw842_ptr2, sw842_ptr2, sw842_ptr4, NULL}, + { sw842_ptr4, sw842_data4, NULL}, /* 20 (10100) */ + { sw842_ptr4, sw842_data2, sw842_ptr2, NULL}, + { sw842_ptr4, sw842_ptr2, sw842_data2, NULL}, + { sw842_ptr4, sw842_ptr2, sw842_ptr2, NULL}, + { sw842_ptr4, sw842_ptr4, NULL}, + { sw842_ptr8, NULL} +}; + +/* Software decompress helpers */ + +static uint8_t sw842_get_byte(const char *buf, int bit) +{ + uint8_t tmpl; + uint16_t tmp; + tmp = htons(*(uint16_t *)(buf)); + tmp = (uint16_t)(tmp << bit); + tmp = ntohs(tmp); + memcpy(&tmpl, &tmp, 1); + return tmpl; +} + +static uint8_t sw842_get_template(const char **buf, int *bit) +{ + uint8_t byte; + byte = sw842_get_byte(*buf, *bit); + byte = byte >> 3; + byte &= 0x1F; + *buf += (*bit + 5) / 8; + *bit = (*bit + 5) % 8; + return byte; +} + +/* repeat_count happens to be 5-bit too (like the template) */ +static uint8_t sw842_get_repeat_count(const char **buf, int *bit) +{ + uint8_t byte; + byte = sw842_get_byte(*buf, *bit); + byte = byte >> 2; + byte &= 0x3F; + *buf += (*bit + 6) / 8; + *bit = (*bit + 6) % 8; + return byte; +} + +static uint8_t sw842_get_ptr2(const char **buf, int *bit) +{ + uint8_t ptr; + ptr = sw842_get_byte(*buf, *bit); + (*buf)++; + return ptr; +} + +static uint16_t sw842_get_ptr4(const char **buf, int *bit, + struct sw842_fifo *fifo) +{ + uint16_t ptr; + ptr = htons(*(uint16_t *)(*buf)); + ptr = (uint16_t)(ptr << *bit); + ptr = ptr >> 7; + ptr &= 0x01FF; + *buf += (*bit + 9) / 8; + *bit = (*bit + 9) % 8; + return ptr; +} + +static uint8_t sw842_get_ptr8(const char **buf, int *bit, + struct sw842_fifo *fifo) +{ + return sw842_get_ptr2(buf, bit); +} + +/* Software decompress template ops */ + +static int sw842_data8(const char **inbuf, int *inbit, + unsigned char **outbuf, struct sw842_fifo *fifo) +{ + int ret; + + ret = sw842_data4(inbuf, inbit, outbuf, fifo); + if (ret) + return ret; + ret = sw842_data4(inbuf, inbit, outbuf, fifo); + return ret; +} + +static int sw842_data4(const char **inbuf, int *inbit, + unsigned char **outbuf, struct sw842_fifo *fifo) +{ + int ret; + + ret = sw842_data2(inbuf, inbit, outbuf, fifo); + if (ret) + return ret; + ret = sw842_data2(inbuf, inbit, outbuf, fifo); + return ret; +} + +static int sw842_data2(const char **inbuf, int *inbit, + unsigned char **outbuf, struct sw842_fifo *fifo) +{ + **outbuf = sw842_get_byte(*inbuf, *inbit); + (*inbuf)++; + (*outbuf)++; + **outbuf = sw842_get_byte(*inbuf, *inbit); + (*inbuf)++; + (*outbuf)++; + return 0; +} + +static int sw842_ptr8(const char **inbuf, int *inbit, + unsigned char **outbuf, struct sw842_fifo *fifo) +{ + uint8_t ptr; + ptr = sw842_get_ptr8(inbuf, inbit, fifo); + if (!fifo->f84_full && (ptr >= fifo->f8_count)) + return 1; + memcpy(*outbuf, fifo->f8[ptr], 8); + *outbuf += 8; + return 0; +} + +static int sw842_ptr4(const char **inbuf, int *inbit, + unsigned char **outbuf, struct sw842_fifo *fifo) +{ + uint16_t ptr; + ptr = sw842_get_ptr4(inbuf, inbit, fifo); + if (!fifo->f84_full && (ptr >= fifo->f4_count)) + return 1; + memcpy(*outbuf, fifo->f4[ptr], 4); + *outbuf += 4; + return 0; +} + +static int sw842_ptr2(const char **inbuf, int *inbit, + unsigned char **outbuf, struct sw842_fifo *fifo) +{ + uint8_t ptr; + ptr = sw842_get_ptr2(inbuf, inbit); + if (!fifo->f2_full && (ptr >= fifo->f2_count)) + return 1; + memcpy(*outbuf, fifo->f2[ptr], 2); + *outbuf += 2; + return 0; +} + +static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo) +{ + unsigned char initial_f2count = fifo->f2_count; + + memcpy(fifo->f8[fifo->f8_count], buf, 8); + fifo->f4_count += 2; + fifo->f8_count += 1; + + if (!fifo->f84_full && fifo->f4_count >= 512) { + fifo->f84_full = 1; + fifo->f4_count /= 512; + } + + memcpy(fifo->f2[fifo->f2_count++], buf, 2); + memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2); + memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2); + memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2); + if (fifo->f2_count < initial_f2count) + fifo->f2_full = 1; +} + +static int sw842_decompress(const unsigned char *src, int srclen, + unsigned char *dst, int *destlen, + const void *wrkmem) +{ + uint8_t tmpl; + const char *inbuf; + int inbit = 0; + unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf; + const char *inbuf_end; + sw842_template_op op; + int opindex; + int i, repeat_count; + struct sw842_fifo *fifo; + int ret = 0; + + fifo = &((struct nx842_workmem *)(wrkmem))->swfifo; + memset(fifo, 0, sizeof(*fifo)); + + origbuf = NULL; + inbuf = src; + inbuf_end = src + srclen; + outbuf = dst; + outbuf_end = dst + *destlen; + + while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) { + if (inbuf >= inbuf_end) { + ret = -EINVAL; + goto out; + } + + opindex = 0; + prevbuf = origbuf; + origbuf = outbuf; + switch (tmpl) { + case SW842_TMPL_REPEAT: + if (prevbuf == NULL) { + ret = -EINVAL; + goto out; + } + + repeat_count = sw842_get_repeat_count(&inbuf, + &inbit) + 1; + + /* Did the repeat count advance past the end of input */ + if (inbuf > inbuf_end) { + ret = -EINVAL; + goto out; + } + + for (i = 0; i < repeat_count; i++) { + /* Would this overflow the output buffer */ + if ((outbuf + 8) > outbuf_end) { + ret = -ENOSPC; + goto out; + } + + memcpy(outbuf, prevbuf, 8); + sw842_copy_to_fifo(outbuf, fifo); + outbuf += 8; + } + break; + + case SW842_TMPL_ZEROS: + /* Would this overflow the output buffer */ + if ((outbuf + 8) > outbuf_end) { + ret = -ENOSPC; + goto out; + } + + memset(outbuf, 0, 8); + sw842_copy_to_fifo(outbuf, fifo); + outbuf += 8; + break; + + default: + if (tmpl > 25) { + ret = -EINVAL; + goto out; + } + + /* Does this go past the end of the input buffer */ + if ((inbuf + 2) > inbuf_end) { + ret = -EINVAL; + goto out; + } + + /* Would this overflow the output buffer */ + if ((outbuf + 8) > outbuf_end) { + ret = -ENOSPC; + goto out; + } + + while (opindex < 4 && + (op = sw842_tmpl_ops[tmpl][opindex++]) + != NULL) { + ret = (*op)(&inbuf, &inbit, &outbuf, fifo); + if (ret) { + ret = -EINVAL; + goto out; + } + sw842_copy_to_fifo(origbuf, fifo); + } + } + } + +out: + if (!ret) + *destlen = (unsigned int)(outbuf - dst); + else + *destlen = 0; + + return ret; +} diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c index 69ed796ee32..cc00b52306b 100644 --- a/drivers/crypto/nx/nx-aes-cbc.c +++ b/drivers/crypto/nx/nx-aes-cbc.c @@ -70,35 +70,52 @@ static int cbc_aes_nx_crypt(struct blkcipher_desc *desc, { struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; + unsigned long irq_flags; + unsigned int processed = 0, to_process; + u32 max_sg_len; int rc; - if (nbytes > nx_ctx->ap->databytelen) - return -EINVAL; + spin_lock_irqsave(&nx_ctx->lock, irq_flags); + + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); if (enc) NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; else NX_CPB_FDM(csbcpb) &= ~NX_FDM_ENDE_ENCRYPT; - rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, - csbcpb->cpb.aes_cbc.iv); - if (rc) - goto out; - - if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { - rc = -EINVAL; - goto out; - } - - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, - desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; - - atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, - &(nx_ctx->stats->aes_bytes)); + do { + to_process = min_t(u64, nbytes - processed, + nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process & ~(AES_BLOCK_SIZE - 1); + + rc = nx_build_sg_lists(nx_ctx, desc, dst, src, to_process, + processed, csbcpb->cpb.aes_cbc.iv); + if (rc) + goto out; + + if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { + rc = -EINVAL; + goto out; + } + + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + + memcpy(desc->info, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE); + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(csbcpb->csb.processed_byte_count, + &(nx_ctx->stats->aes_bytes)); + + processed += to_process; + } while (processed < nbytes); out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -126,8 +143,8 @@ struct crypto_alg nx_cbc_aes_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), .cra_type = &crypto_blkcipher_type, + .cra_alignmask = 0xf, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(nx_cbc_aes_alg.cra_list), .cra_init = nx_crypto_ctx_aes_cbc_init, .cra_exit = nx_crypto_ctx_exit, .cra_blkcipher = { diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index 7aeac678b9c..5ecd4c2414a 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -179,13 +179,26 @@ static int generate_pat(u8 *iv, struct nx_sg *nx_insg = nx_ctx->in_sg; struct nx_sg *nx_outsg = nx_ctx->out_sg; unsigned int iauth_len = 0; - struct vio_pfo_op *op = NULL; u8 tmp[16], *b1 = NULL, *b0 = NULL, *result = NULL; int rc; /* zero the ctr value */ memset(iv + 15 - iv[0], 0, iv[0] + 1); + /* page 78 of nx_wb.pdf has, + * Note: RFC3610 allows the AAD data to be up to 2^64 -1 bytes + * in length. If a full message is used, the AES CCA implementation + * restricts the maximum AAD length to 2^32 -1 bytes. + * If partial messages are used, the implementation supports + * 2^64 -1 bytes maximum AAD length. + * + * However, in the cryptoapi's aead_request structure, + * assoclen is an unsigned int, thus it cannot hold a length + * value greater than 2^32 - 1. + * Thus the AAD is further constrained by this and is never + * greater than 2^32. + */ + if (!req->assoclen) { b0 = nx_ctx->csbcpb->cpb.aes_ccm.in_pat_or_b0; } else if (req->assoclen <= 14) { @@ -195,7 +208,46 @@ static int generate_pat(u8 *iv, b0 = nx_ctx->csbcpb->cpb.aes_ccm.in_pat_or_b0; b1 = nx_ctx->priv.ccm.iauth_tag; iauth_len = req->assoclen; + } else if (req->assoclen <= 65280) { + /* if associated data is less than (2^16 - 2^8), we construct + * B1 differently and feed in the associated data to a CCA + * operation */ + b0 = nx_ctx->csbcpb_aead->cpb.aes_cca.b0; + b1 = nx_ctx->csbcpb_aead->cpb.aes_cca.b1; + iauth_len = 14; + } else { + b0 = nx_ctx->csbcpb_aead->cpb.aes_cca.b0; + b1 = nx_ctx->csbcpb_aead->cpb.aes_cca.b1; + iauth_len = 10; + } + + /* generate B0 */ + rc = generate_b0(iv, req->assoclen, authsize, nbytes, b0); + if (rc) + return rc; + + /* generate B1: + * add control info for associated data + * RFC 3610 and NIST Special Publication 800-38C + */ + if (b1) { + memset(b1, 0, 16); + if (req->assoclen <= 65280) { + *(u16 *)b1 = (u16)req->assoclen; + scatterwalk_map_and_copy(b1 + 2, req->assoc, 0, + iauth_len, SCATTERWALK_FROM_SG); + } else { + *(u16 *)b1 = (u16)(0xfffe); + *(u32 *)&b1[2] = (u32)req->assoclen; + scatterwalk_map_and_copy(b1 + 6, req->assoc, 0, + iauth_len, SCATTERWALK_FROM_SG); + } + } + /* now copy any remaining AAD to scatterlist and call nx... */ + if (!req->assoclen) { + return rc; + } else if (req->assoclen <= 14) { nx_insg = nx_build_sg_list(nx_insg, b1, 16, nx_ctx->ap->sglen); nx_outsg = nx_build_sg_list(nx_outsg, tmp, 16, nx_ctx->ap->sglen); @@ -210,56 +262,74 @@ static int generate_pat(u8 *iv, NX_CPB_FDM(nx_ctx->csbcpb) |= NX_FDM_ENDE_ENCRYPT; NX_CPB_FDM(nx_ctx->csbcpb) |= NX_FDM_INTERMEDIATE; - op = &nx_ctx->op; result = nx_ctx->csbcpb->cpb.aes_ccm.out_pat_or_mac; - } else if (req->assoclen <= 65280) { - /* if associated data is less than (2^16 - 2^8), we construct - * B1 differently and feed in the associated data to a CCA - * operation */ - b0 = nx_ctx->csbcpb_aead->cpb.aes_cca.b0; - b1 = nx_ctx->csbcpb_aead->cpb.aes_cca.b1; - iauth_len = 14; - /* remaining assoc data must have scatterlist built for it */ - nx_insg = nx_walk_and_build(nx_insg, nx_ctx->ap->sglen, - req->assoc, iauth_len, - req->assoclen - iauth_len); - nx_ctx->op_aead.inlen = (nx_ctx->in_sg - nx_insg) * - sizeof(struct nx_sg); + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + return rc; + + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(req->assoclen, &(nx_ctx->stats->aes_bytes)); - op = &nx_ctx->op_aead; - result = nx_ctx->csbcpb_aead->cpb.aes_cca.out_pat_or_b0; } else { - /* if associated data is less than (2^32), we construct B1 - * differently yet again and feed in the associated data to a - * CCA operation */ - pr_err("associated data len is %u bytes (returning -EINVAL)\n", - req->assoclen); - rc = -EINVAL; - } + u32 max_sg_len; + unsigned int processed = 0, to_process; + + /* page_limit: number of sg entries that fit on one page */ + max_sg_len = min_t(u32, + nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); + + processed += iauth_len; + + do { + to_process = min_t(u32, req->assoclen - processed, + nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + + if ((to_process + processed) < req->assoclen) { + NX_CPB_FDM(nx_ctx->csbcpb_aead) |= + NX_FDM_INTERMEDIATE; + } else { + NX_CPB_FDM(nx_ctx->csbcpb_aead) &= + ~NX_FDM_INTERMEDIATE; + } + + nx_insg = nx_walk_and_build(nx_ctx->in_sg, + nx_ctx->ap->sglen, + req->assoc, processed, + to_process); + + nx_ctx->op_aead.inlen = (nx_ctx->in_sg - nx_insg) * + sizeof(struct nx_sg); - rc = generate_b0(iv, req->assoclen, authsize, nbytes, b0); - if (rc) - goto done; + result = nx_ctx->csbcpb_aead->cpb.aes_cca.out_pat_or_b0; - if (b1) { - memset(b1, 0, 16); - *(u16 *)b1 = (u16)req->assoclen; + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op_aead, + req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + return rc; - scatterwalk_map_and_copy(b1 + 2, req->assoc, 0, - iauth_len, SCATTERWALK_FROM_SG); + memcpy(nx_ctx->csbcpb_aead->cpb.aes_cca.b0, + nx_ctx->csbcpb_aead->cpb.aes_cca.out_pat_or_b0, + AES_BLOCK_SIZE); - rc = nx_hcall_sync(nx_ctx, op, - req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto done; + NX_CPB_FDM(nx_ctx->csbcpb_aead) |= NX_FDM_CONTINUATION; - atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(req->assoclen, &(nx_ctx->stats->aes_bytes)); + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(req->assoclen, + &(nx_ctx->stats->aes_bytes)); - memcpy(out, result, AES_BLOCK_SIZE); + processed += to_process; + } while (processed < req->assoclen); + + result = nx_ctx->csbcpb_aead->cpb.aes_cca.out_pat_or_b0; } -done: + + memcpy(out, result, AES_BLOCK_SIZE); + return rc; } @@ -271,10 +341,12 @@ static int ccm_nx_decrypt(struct aead_request *req, unsigned int nbytes = req->cryptlen; unsigned int authsize = crypto_aead_authsize(crypto_aead_reqtfm(req)); struct nx_ccm_priv *priv = &nx_ctx->priv.ccm; + unsigned long irq_flags; + unsigned int processed = 0, to_process; + u32 max_sg_len; int rc = -1; - if (nbytes > nx_ctx->ap->databytelen) - return -EINVAL; + spin_lock_irqsave(&nx_ctx->lock, irq_flags); nbytes -= authsize; @@ -288,26 +360,61 @@ static int ccm_nx_decrypt(struct aead_request *req, if (rc) goto out; - rc = nx_build_sg_lists(nx_ctx, desc, req->dst, req->src, nbytes, - csbcpb->cpb.aes_ccm.iv_or_ctr); - if (rc) - goto out; + /* page_limit: number of sg entries that fit on one page */ + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); - NX_CPB_FDM(nx_ctx->csbcpb) &= ~NX_FDM_ENDE_ENCRYPT; - NX_CPB_FDM(nx_ctx->csbcpb) &= ~NX_FDM_INTERMEDIATE; + do { + + /* to_process: the AES_BLOCK_SIZE data chunk to process in this + * update. This value is bound by sg list limits. + */ + to_process = min_t(u64, nbytes - processed, + nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + + if ((to_process + processed) < nbytes) + NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + else + NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE; + + NX_CPB_FDM(nx_ctx->csbcpb) &= ~NX_FDM_ENDE_ENCRYPT; + + rc = nx_build_sg_lists(nx_ctx, desc, req->dst, req->src, + to_process, processed, + csbcpb->cpb.aes_ccm.iv_or_ctr); + if (rc) + goto out; - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + if (rc) + goto out; - atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, - &(nx_ctx->stats->aes_bytes)); + /* for partial completion, copy following for next + * entry into loop... + */ + memcpy(desc->info, csbcpb->cpb.aes_ccm.out_ctr, AES_BLOCK_SIZE); + memcpy(csbcpb->cpb.aes_ccm.in_pat_or_b0, + csbcpb->cpb.aes_ccm.out_pat_or_mac, AES_BLOCK_SIZE); + memcpy(csbcpb->cpb.aes_ccm.in_s0, + csbcpb->cpb.aes_ccm.out_s0, AES_BLOCK_SIZE); + + NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + + /* update stats */ + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(csbcpb->csb.processed_byte_count, + &(nx_ctx->stats->aes_bytes)); + + processed += to_process; + } while (processed < nbytes); rc = memcmp(csbcpb->cpb.aes_ccm.out_pat_or_mac, priv->oauth_tag, authsize) ? -EBADMSG : 0; out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -318,38 +425,76 @@ static int ccm_nx_encrypt(struct aead_request *req, struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; unsigned int nbytes = req->cryptlen; unsigned int authsize = crypto_aead_authsize(crypto_aead_reqtfm(req)); + unsigned long irq_flags; + unsigned int processed = 0, to_process; + u32 max_sg_len; int rc = -1; - if (nbytes > nx_ctx->ap->databytelen) - return -EINVAL; + spin_lock_irqsave(&nx_ctx->lock, irq_flags); rc = generate_pat(desc->info, req, nx_ctx, authsize, nbytes, csbcpb->cpb.aes_ccm.in_pat_or_b0); if (rc) goto out; - rc = nx_build_sg_lists(nx_ctx, desc, req->dst, req->src, nbytes, - csbcpb->cpb.aes_ccm.iv_or_ctr); - if (rc) - goto out; + /* page_limit: number of sg entries that fit on one page */ + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); + + do { + /* to process: the AES_BLOCK_SIZE data chunk to process in this + * update. This value is bound by sg list limits. + */ + to_process = min_t(u64, nbytes - processed, + nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + + if ((to_process + processed) < nbytes) + NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + else + NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE; + + NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; + + rc = nx_build_sg_lists(nx_ctx, desc, req->dst, req->src, + to_process, processed, + csbcpb->cpb.aes_ccm.iv_or_ctr); + if (rc) + goto out; - NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; - NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE; + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, - req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + /* for partial completion, copy following for next + * entry into loop... + */ + memcpy(desc->info, csbcpb->cpb.aes_ccm.out_ctr, AES_BLOCK_SIZE); + memcpy(csbcpb->cpb.aes_ccm.in_pat_or_b0, + csbcpb->cpb.aes_ccm.out_pat_or_mac, AES_BLOCK_SIZE); + memcpy(csbcpb->cpb.aes_ccm.in_s0, + csbcpb->cpb.aes_ccm.out_s0, AES_BLOCK_SIZE); - atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, - &(nx_ctx->stats->aes_bytes)); + NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + + /* update stats */ + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(csbcpb->csb.processed_byte_count, + &(nx_ctx->stats->aes_bytes)); + + processed += to_process; + + } while (processed < nbytes); /* copy out the auth tag */ scatterwalk_map_and_copy(csbcpb->cpb.aes_ccm.out_pat_or_mac, req->dst, nbytes, authsize, SCATTERWALK_TO_SG); + out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -430,7 +575,6 @@ struct crypto_alg nx_ccm_aes_alg = { .cra_ctxsize = sizeof(struct nx_crypto_ctx), .cra_type = &crypto_aead_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(nx_ccm_aes_alg.cra_list), .cra_init = nx_crypto_ctx_aes_ccm_init, .cra_exit = nx_crypto_ctx_exit, .cra_aead = { @@ -453,7 +597,6 @@ struct crypto_alg nx_ccm4309_aes_alg = { .cra_ctxsize = sizeof(struct nx_crypto_ctx), .cra_type = &crypto_nivaead_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(nx_ccm4309_aes_alg.cra_list), .cra_init = nx_crypto_ctx_aes_ccm_init, .cra_exit = nx_crypto_ctx_exit, .cra_aead = { diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 52d4eb05e8f..a37d009dc75 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -88,30 +88,48 @@ static int ctr_aes_nx_crypt(struct blkcipher_desc *desc, { struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; + unsigned long irq_flags; + unsigned int processed = 0, to_process; + u32 max_sg_len; int rc; - if (nbytes > nx_ctx->ap->databytelen) - return -EINVAL; + spin_lock_irqsave(&nx_ctx->lock, irq_flags); - rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, - csbcpb->cpb.aes_ctr.iv); - if (rc) - goto out; + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); - if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { - rc = -EINVAL; - goto out; - } + do { + to_process = min_t(u64, nbytes - processed, + nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process & ~(AES_BLOCK_SIZE - 1); + + rc = nx_build_sg_lists(nx_ctx, desc, dst, src, to_process, + processed, csbcpb->cpb.aes_ctr.iv); + if (rc) + goto out; + + if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { + rc = -EINVAL; + goto out; + } + + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + + memcpy(desc->info, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE); - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, - desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(csbcpb->csb.processed_byte_count, + &(nx_ctx->stats->aes_bytes)); - atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, - &(nx_ctx->stats->aes_bytes)); + processed += to_process; + } while (processed < nbytes); out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -141,7 +159,6 @@ struct crypto_alg nx_ctr_aes_alg = { .cra_ctxsize = sizeof(struct nx_crypto_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(nx_ctr_aes_alg.cra_list), .cra_init = nx_crypto_ctx_aes_ctr_init, .cra_exit = nx_crypto_ctx_exit, .cra_blkcipher = { @@ -163,7 +180,6 @@ struct crypto_alg nx_ctr3686_aes_alg = { .cra_ctxsize = sizeof(struct nx_crypto_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(nx_ctr3686_aes_alg.cra_list), .cra_init = nx_crypto_ctx_aes_ctr_init, .cra_exit = nx_crypto_ctx_exit, .cra_blkcipher = { diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c index 7b77bc2d1df..85a8d23cf29 100644 --- a/drivers/crypto/nx/nx-aes-ecb.c +++ b/drivers/crypto/nx/nx-aes-ecb.c @@ -70,34 +70,52 @@ static int ecb_aes_nx_crypt(struct blkcipher_desc *desc, { struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; + unsigned long irq_flags; + unsigned int processed = 0, to_process; + u32 max_sg_len; int rc; - if (nbytes > nx_ctx->ap->databytelen) - return -EINVAL; + spin_lock_irqsave(&nx_ctx->lock, irq_flags); + + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); if (enc) NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; else NX_CPB_FDM(csbcpb) &= ~NX_FDM_ENDE_ENCRYPT; - rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, NULL); - if (rc) - goto out; + do { + to_process = min_t(u64, nbytes - processed, + nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process & ~(AES_BLOCK_SIZE - 1); - if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { - rc = -EINVAL; - goto out; - } + rc = nx_build_sg_lists(nx_ctx, desc, dst, src, to_process, + processed, NULL); + if (rc) + goto out; + + if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { + rc = -EINVAL; + goto out; + } + + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(csbcpb->csb.processed_byte_count, + &(nx_ctx->stats->aes_bytes)); - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, - desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + processed += to_process; + } while (processed < nbytes); - atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, - &(nx_ctx->stats->aes_bytes)); out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -123,10 +141,10 @@ struct crypto_alg nx_ecb_aes_alg = { .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = AES_BLOCK_SIZE, + .cra_alignmask = 0xf, .cra_ctxsize = sizeof(struct nx_crypto_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(nx_ecb_aes_alg.cra_list), .cra_init = nx_crypto_ctx_aes_ecb_init, .cra_exit = nx_crypto_ctx_exit, .cra_blkcipher = { diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 9ab1c7341da..025d9a8d5b1 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -125,38 +125,187 @@ static int nx_gca(struct nx_crypto_ctx *nx_ctx, struct aead_request *req, u8 *out) { + int rc; struct nx_csbcpb *csbcpb_aead = nx_ctx->csbcpb_aead; - int rc = -EINVAL; struct scatter_walk walk; struct nx_sg *nx_sg = nx_ctx->in_sg; + unsigned int nbytes = req->assoclen; + unsigned int processed = 0, to_process; + u32 max_sg_len; - if (req->assoclen > nx_ctx->ap->databytelen) - goto out; - - if (req->assoclen <= AES_BLOCK_SIZE) { + if (nbytes <= AES_BLOCK_SIZE) { scatterwalk_start(&walk, req->assoc); - scatterwalk_copychunks(out, &walk, req->assoclen, - SCATTERWALK_FROM_SG); + scatterwalk_copychunks(out, &walk, nbytes, SCATTERWALK_FROM_SG); scatterwalk_done(&walk, SCATTERWALK_FROM_SG, 0); - - rc = 0; - goto out; + return 0; } - nx_sg = nx_walk_and_build(nx_sg, nx_ctx->ap->sglen, req->assoc, 0, - req->assoclen); - nx_ctx->op_aead.inlen = (nx_ctx->in_sg - nx_sg) * sizeof(struct nx_sg); + NX_CPB_FDM(csbcpb_aead) &= ~NX_FDM_CONTINUATION; + + /* page_limit: number of sg entries that fit on one page */ + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); + + do { + /* + * to_process: the data chunk to process in this update. + * This value is bound by sg list limits. + */ + to_process = min_t(u64, nbytes - processed, + nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + + if ((to_process + processed) < nbytes) + NX_CPB_FDM(csbcpb_aead) |= NX_FDM_INTERMEDIATE; + else + NX_CPB_FDM(csbcpb_aead) &= ~NX_FDM_INTERMEDIATE; + + nx_sg = nx_walk_and_build(nx_ctx->in_sg, nx_ctx->ap->sglen, + req->assoc, processed, to_process); + nx_ctx->op_aead.inlen = (nx_ctx->in_sg - nx_sg) + * sizeof(struct nx_sg); + + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op_aead, + req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + return rc; + + memcpy(csbcpb_aead->cpb.aes_gca.in_pat, + csbcpb_aead->cpb.aes_gca.out_pat, + AES_BLOCK_SIZE); + NX_CPB_FDM(csbcpb_aead) |= NX_FDM_CONTINUATION; + + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(req->assoclen, &(nx_ctx->stats->aes_bytes)); + + processed += to_process; + } while (processed < nbytes); + + memcpy(out, csbcpb_aead->cpb.aes_gca.out_pat, AES_BLOCK_SIZE); + + return rc; +} + +static int gmac(struct aead_request *req, struct blkcipher_desc *desc) +{ + int rc; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; + struct nx_sg *nx_sg; + unsigned int nbytes = req->assoclen; + unsigned int processed = 0, to_process; + u32 max_sg_len; + + /* Set GMAC mode */ + csbcpb->cpb.hdr.mode = NX_MODE_AES_GMAC; + + NX_CPB_FDM(csbcpb) &= ~NX_FDM_CONTINUATION; + + /* page_limit: number of sg entries that fit on one page */ + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); + + /* Copy IV */ + memcpy(csbcpb->cpb.aes_gcm.iv_or_cnt, desc->info, AES_BLOCK_SIZE); + + do { + /* + * to_process: the data chunk to process in this update. + * This value is bound by sg list limits. + */ + to_process = min_t(u64, nbytes - processed, + nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + + if ((to_process + processed) < nbytes) + NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + else + NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE; + + nx_sg = nx_walk_and_build(nx_ctx->in_sg, nx_ctx->ap->sglen, + req->assoc, processed, to_process); + nx_ctx->op.inlen = (nx_ctx->in_sg - nx_sg) + * sizeof(struct nx_sg); + + csbcpb->cpb.aes_gcm.bit_length_data = 0; + csbcpb->cpb.aes_gcm.bit_length_aad = 8 * nbytes; + + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + + memcpy(csbcpb->cpb.aes_gcm.in_pat_or_aad, + csbcpb->cpb.aes_gcm.out_pat_or_mac, AES_BLOCK_SIZE); + memcpy(csbcpb->cpb.aes_gcm.in_s0, + csbcpb->cpb.aes_gcm.out_s0, AES_BLOCK_SIZE); + + NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(req->assoclen, &(nx_ctx->stats->aes_bytes)); + + processed += to_process; + } while (processed < nbytes); + +out: + /* Restore GCM mode */ + csbcpb->cpb.hdr.mode = NX_MODE_AES_GCM; + return rc; +} + +static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc, + int enc) +{ + int rc; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; + char out[AES_BLOCK_SIZE]; + struct nx_sg *in_sg, *out_sg; + + /* For scenarios where the input message is zero length, AES CTR mode + * may be used. Set the source data to be a single block (16B) of all + * zeros, and set the input IV value to be the same as the GMAC IV + * value. - nx_wb 4.8.1.3 */ + + /* Change to ECB mode */ + csbcpb->cpb.hdr.mode = NX_MODE_AES_ECB; + memcpy(csbcpb->cpb.aes_ecb.key, csbcpb->cpb.aes_gcm.key, + sizeof(csbcpb->cpb.aes_ecb.key)); + if (enc) + NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; + else + NX_CPB_FDM(csbcpb) &= ~NX_FDM_ENDE_ENCRYPT; + + /* Encrypt the counter/IV */ + in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) desc->info, + AES_BLOCK_SIZE, nx_ctx->ap->sglen); + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *) out, sizeof(out), + nx_ctx->ap->sglen); + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op_aead, - req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); if (rc) goto out; - atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(req->assoclen, &(nx_ctx->stats->aes_bytes)); - memcpy(out, csbcpb_aead->cpb.aes_gca.out_pat, AES_BLOCK_SIZE); + /* Copy out the auth tag */ + memcpy(csbcpb->cpb.aes_gcm.out_pat_or_mac, out, + crypto_aead_authsize(crypto_aead_reqtfm(req))); out: + /* Restore XCBC mode */ + csbcpb->cpb.hdr.mode = NX_MODE_AES_GCM; + + /* + * ECB key uses the same region that GCM AAD and counter, so it's safe + * to just fill it with zeroes. + */ + memset(csbcpb->cpb.aes_ecb.key, 0, sizeof(csbcpb->cpb.aes_ecb.key)); + return rc; } @@ -166,88 +315,104 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct blkcipher_desc desc; unsigned int nbytes = req->cryptlen; + unsigned int processed = 0, to_process; + unsigned long irq_flags; + u32 max_sg_len; int rc = -EINVAL; - if (nbytes > nx_ctx->ap->databytelen) - goto out; + spin_lock_irqsave(&nx_ctx->lock, irq_flags); desc.info = nx_ctx->priv.gcm.iv; /* initialize the counter */ *(u32 *)(desc.info + NX_GCM_CTR_OFFSET) = 1; - /* For scenarios where the input message is zero length, AES CTR mode - * may be used. Set the source data to be a single block (16B) of all - * zeros, and set the input IV value to be the same as the GMAC IV - * value. - nx_wb 4.8.1.3 */ if (nbytes == 0) { - char src[AES_BLOCK_SIZE] = {}; - struct scatterlist sg; - - desc.tfm = crypto_alloc_blkcipher("ctr(aes)", 0, 0); - if (IS_ERR(desc.tfm)) { - rc = -ENOMEM; + if (req->assoclen == 0) + rc = gcm_empty(req, &desc, enc); + else + rc = gmac(req, &desc); + if (rc) goto out; - } - - crypto_blkcipher_setkey(desc.tfm, csbcpb->cpb.aes_gcm.key, - NX_CPB_KEY_SIZE(csbcpb) == NX_KS_AES_128 ? 16 : - NX_CPB_KEY_SIZE(csbcpb) == NX_KS_AES_192 ? 24 : 32); - - sg_init_one(&sg, src, AES_BLOCK_SIZE); - if (enc) - crypto_blkcipher_encrypt_iv(&desc, req->dst, &sg, - AES_BLOCK_SIZE); else - crypto_blkcipher_decrypt_iv(&desc, req->dst, &sg, - AES_BLOCK_SIZE); - crypto_free_blkcipher(desc.tfm); - - rc = 0; - goto out; + goto mac; } - desc.tfm = (struct crypto_blkcipher *)req->base.tfm; - + /* Process associated data */ csbcpb->cpb.aes_gcm.bit_length_aad = req->assoclen * 8; - if (req->assoclen) { rc = nx_gca(nx_ctx, req, csbcpb->cpb.aes_gcm.in_pat_or_aad); if (rc) goto out; } - if (enc) + /* Set flags for encryption */ + NX_CPB_FDM(csbcpb) &= ~NX_FDM_CONTINUATION; + if (enc) { NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; - else - nbytes -= AES_BLOCK_SIZE; + } else { + NX_CPB_FDM(csbcpb) &= ~NX_FDM_ENDE_ENCRYPT; + nbytes -= crypto_aead_authsize(crypto_aead_reqtfm(req)); + } - csbcpb->cpb.aes_gcm.bit_length_data = nbytes * 8; + /* page_limit: number of sg entries that fit on one page */ + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); + + do { + /* + * to_process: the data chunk to process in this update. + * This value is bound by sg list limits. + */ + to_process = min_t(u64, nbytes - processed, + nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + + if ((to_process + processed) < nbytes) + NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + else + NX_CPB_FDM(csbcpb) &= ~NX_FDM_INTERMEDIATE; - rc = nx_build_sg_lists(nx_ctx, &desc, req->dst, req->src, nbytes, - csbcpb->cpb.aes_gcm.iv_or_cnt); - if (rc) - goto out; + csbcpb->cpb.aes_gcm.bit_length_data = nbytes * 8; + desc.tfm = (struct crypto_blkcipher *) req->base.tfm; + rc = nx_build_sg_lists(nx_ctx, &desc, req->dst, + req->src, to_process, processed, + csbcpb->cpb.aes_gcm.iv_or_cnt); + if (rc) + goto out; - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, - req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; - atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, - &(nx_ctx->stats->aes_bytes)); + memcpy(desc.info, csbcpb->cpb.aes_gcm.out_cnt, AES_BLOCK_SIZE); + memcpy(csbcpb->cpb.aes_gcm.in_pat_or_aad, + csbcpb->cpb.aes_gcm.out_pat_or_mac, AES_BLOCK_SIZE); + memcpy(csbcpb->cpb.aes_gcm.in_s0, + csbcpb->cpb.aes_gcm.out_s0, AES_BLOCK_SIZE); + + NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + + atomic_inc(&(nx_ctx->stats->aes_ops)); + atomic64_add(csbcpb->csb.processed_byte_count, + &(nx_ctx->stats->aes_bytes)); + + processed += to_process; + } while (processed < nbytes); +mac: if (enc) { /* copy out the auth tag */ scatterwalk_map_and_copy(csbcpb->cpb.aes_gcm.out_pat_or_mac, req->dst, nbytes, crypto_aead_authsize(crypto_aead_reqtfm(req)), SCATTERWALK_TO_SG); - } else if (req->assoclen) { + } else { u8 *itag = nx_ctx->priv.gcm.iauth_tag; u8 *otag = csbcpb->cpb.aes_gcm.out_pat_or_mac; - scatterwalk_map_and_copy(itag, req->dst, nbytes, + scatterwalk_map_and_copy(itag, req->src, nbytes, crypto_aead_authsize(crypto_aead_reqtfm(req)), SCATTERWALK_FROM_SG); rc = memcmp(itag, otag, @@ -255,6 +420,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) -EBADMSG : 0; } out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -316,7 +482,6 @@ struct crypto_alg nx_gcm_aes_alg = { .cra_ctxsize = sizeof(struct nx_crypto_ctx), .cra_type = &crypto_aead_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(nx_gcm_aes_alg.cra_list), .cra_init = nx_crypto_ctx_aes_gcm_init, .cra_exit = nx_crypto_ctx_exit, .cra_aead = { @@ -338,7 +503,6 @@ struct crypto_alg nx_gcm4106_aes_alg = { .cra_ctxsize = sizeof(struct nx_crypto_ctx), .cra_type = &crypto_nivaead_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(nx_gcm4106_aes_alg.cra_list), .cra_init = nx_crypto_ctx_aes_gcm_init, .cra_exit = nx_crypto_ctx_exit, .cra_aead = { diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c index 93923e4628c..03c4bf57d06 100644 --- a/drivers/crypto/nx/nx-aes-xcbc.c +++ b/drivers/crypto/nx/nx-aes-xcbc.c @@ -56,6 +56,77 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, return 0; } +/* + * Based on RFC 3566, for a zero-length message: + * + * n = 1 + * K1 = E(K, 0x01010101010101010101010101010101) + * K3 = E(K, 0x03030303030303030303030303030303) + * E[0] = 0x00000000000000000000000000000000 + * M[1] = 0x80000000000000000000000000000000 (0 length message with padding) + * E[1] = (K1, M[1] ^ E[0] ^ K3) + * Tag = M[1] + */ +static int nx_xcbc_empty(struct shash_desc *desc, u8 *out) +{ + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; + struct nx_sg *in_sg, *out_sg; + u8 keys[2][AES_BLOCK_SIZE]; + u8 key[32]; + int rc = 0; + + /* Change to ECB mode */ + csbcpb->cpb.hdr.mode = NX_MODE_AES_ECB; + memcpy(key, csbcpb->cpb.aes_xcbc.key, AES_BLOCK_SIZE); + memcpy(csbcpb->cpb.aes_ecb.key, key, AES_BLOCK_SIZE); + NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; + + /* K1 and K3 base patterns */ + memset(keys[0], 0x01, sizeof(keys[0])); + memset(keys[1], 0x03, sizeof(keys[1])); + + /* Generate K1 and K3 encrypting the patterns */ + in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) keys, sizeof(keys), + nx_ctx->ap->sglen); + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *) keys, sizeof(keys), + nx_ctx->ap->sglen); + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + atomic_inc(&(nx_ctx->stats->aes_ops)); + + /* XOr K3 with the padding for a 0 length message */ + keys[1][0] ^= 0x80; + + /* Encrypt the final result */ + memcpy(csbcpb->cpb.aes_ecb.key, keys[0], AES_BLOCK_SIZE); + in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) keys[1], sizeof(keys[1]), + nx_ctx->ap->sglen); + out_sg = nx_build_sg_list(nx_ctx->out_sg, out, AES_BLOCK_SIZE, + nx_ctx->ap->sglen); + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + atomic_inc(&(nx_ctx->stats->aes_ops)); + +out: + /* Restore XCBC mode */ + csbcpb->cpb.hdr.mode = NX_MODE_AES_XCBC_MAC; + memcpy(csbcpb->cpb.aes_xcbc.key, key, AES_BLOCK_SIZE); + NX_CPB_FDM(csbcpb) &= ~NX_FDM_ENDE_ENCRYPT; + + return rc; +} + static int nx_xcbc_init(struct shash_desc *desc) { struct xcbc_state *sctx = shash_desc_ctx(desc); @@ -88,76 +159,99 @@ static int nx_xcbc_update(struct shash_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct nx_sg *in_sg; - u32 to_process, leftover; + u32 to_process, leftover, total; + u32 max_sg_len; + unsigned long irq_flags; int rc = 0; - if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) { - /* we've hit the nx chip previously and we're updating again, - * so copy over the partial digest */ - memcpy(csbcpb->cpb.aes_xcbc.cv, - csbcpb->cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE); - } + spin_lock_irqsave(&nx_ctx->lock, irq_flags); + + + total = sctx->count + len; /* 2 cases for total data len: * 1: <= AES_BLOCK_SIZE: copy into state, return 0 * 2: > AES_BLOCK_SIZE: process X blocks, copy in leftover */ - if (len + sctx->count <= AES_BLOCK_SIZE) { + if (total <= AES_BLOCK_SIZE) { memcpy(sctx->buffer + sctx->count, data, len); sctx->count += len; goto out; } - /* to_process: the AES_BLOCK_SIZE data chunk to process in this - * update */ - to_process = (sctx->count + len) & ~(AES_BLOCK_SIZE - 1); - leftover = (sctx->count + len) & (AES_BLOCK_SIZE - 1); - - /* the hardware will not accept a 0 byte operation for this algorithm - * and the operation MUST be finalized to be correct. So if we happen - * to get an update that falls on a block sized boundary, we must - * save off the last block to finalize with later. */ - if (!leftover) { - to_process -= AES_BLOCK_SIZE; - leftover = AES_BLOCK_SIZE; - } - - if (sctx->count) { - in_sg = nx_build_sg_list(nx_ctx->in_sg, sctx->buffer, - sctx->count, nx_ctx->ap->sglen); - in_sg = nx_build_sg_list(in_sg, (u8 *)data, - to_process - sctx->count, - nx_ctx->ap->sglen); + in_sg = nx_ctx->in_sg; + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); + + do { + + /* to_process: the AES_BLOCK_SIZE data chunk to process in this + * update */ + to_process = min_t(u64, total, nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process & ~(AES_BLOCK_SIZE - 1); + leftover = total - to_process; + + /* the hardware will not accept a 0 byte operation for this + * algorithm and the operation MUST be finalized to be correct. + * So if we happen to get an update that falls on a block sized + * boundary, we must save off the last block to finalize with + * later. */ + if (!leftover) { + to_process -= AES_BLOCK_SIZE; + leftover = AES_BLOCK_SIZE; + } + + if (sctx->count) { + in_sg = nx_build_sg_list(nx_ctx->in_sg, + (u8 *) sctx->buffer, + sctx->count, + max_sg_len); + } + in_sg = nx_build_sg_list(in_sg, + (u8 *) data, + to_process - sctx->count, + max_sg_len); nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); - } else { - in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)data, to_process, - nx_ctx->ap->sglen); - nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * - sizeof(struct nx_sg); - } - NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + /* we've hit the nx chip previously and we're updating again, + * so copy over the partial digest */ + if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) { + memcpy(csbcpb->cpb.aes_xcbc.cv, + csbcpb->cpb.aes_xcbc.out_cv_mac, + AES_BLOCK_SIZE); + } + + NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { + rc = -EINVAL; + goto out; + } + + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; - if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { - rc = -EINVAL; - goto out; - } + atomic_inc(&(nx_ctx->stats->aes_ops)); - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, - desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + /* everything after the first update is continuation */ + NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; - atomic_inc(&(nx_ctx->stats->aes_ops)); + total -= to_process; + data += to_process - sctx->count; + sctx->count = 0; + in_sg = nx_ctx->in_sg; + } while (leftover > AES_BLOCK_SIZE); /* copy the leftover back into the state struct */ - memcpy(sctx->buffer, data + len - leftover, leftover); + memcpy(sctx->buffer, data, leftover); sctx->count = leftover; - /* everything after the first update is continuation */ - NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -167,21 +261,23 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out) struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct nx_sg *in_sg, *out_sg; + unsigned long irq_flags; int rc = 0; + spin_lock_irqsave(&nx_ctx->lock, irq_flags); + if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) { /* we've hit the nx chip previously, now we're finalizing, * so copy over the partial digest */ memcpy(csbcpb->cpb.aes_xcbc.cv, csbcpb->cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE); } else if (sctx->count == 0) { - /* we've never seen an update, so this is a 0 byte op. The - * hardware cannot handle a 0 byte op, so just copy out the - * known 0 byte result. This is cheaper than allocating a - * software context to do a 0 byte op */ - u8 data[] = { 0x75, 0xf0, 0x25, 0x1d, 0x52, 0x8a, 0xc0, 0x1c, - 0x45, 0x73, 0xdf, 0xd5, 0x84, 0xd7, 0x9f, 0x29 }; - memcpy(out, data, sizeof(data)); + /* + * we've never seen an update, so this is a 0 byte op. The + * hardware cannot handle a 0 byte op, so just ECB to + * generate the hash. + */ + rc = nx_xcbc_empty(desc, out); goto out; } @@ -211,6 +307,7 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out) memcpy(out, csbcpb->cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE); out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 9767315f8c0..da0b24a7633 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -55,70 +55,91 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; - u64 to_process, leftover; + u64 to_process, leftover, total; + u32 max_sg_len; + unsigned long irq_flags; int rc = 0; - if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) { - /* we've hit the nx chip previously and we're updating again, - * so copy over the partial digest */ - memcpy(csbcpb->cpb.sha256.input_partial_digest, - csbcpb->cpb.sha256.message_digest, SHA256_DIGEST_SIZE); - } + spin_lock_irqsave(&nx_ctx->lock, irq_flags); /* 2 cases for total data len: - * 1: <= SHA256_BLOCK_SIZE: copy into state, return 0 - * 2: > SHA256_BLOCK_SIZE: process X blocks, copy in leftover + * 1: < SHA256_BLOCK_SIZE: copy into state, return 0 + * 2: >= SHA256_BLOCK_SIZE: process X blocks, copy in leftover */ - if (len + sctx->count <= SHA256_BLOCK_SIZE) { + total = sctx->count + len; + if (total < SHA256_BLOCK_SIZE) { memcpy(sctx->buf + sctx->count, data, len); sctx->count += len; goto out; } - /* to_process: the SHA256_BLOCK_SIZE data chunk to process in this - * update */ - to_process = (sctx->count + len) & ~(SHA256_BLOCK_SIZE - 1); - leftover = (sctx->count + len) & (SHA256_BLOCK_SIZE - 1); - - if (sctx->count) { - in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buf, - sctx->count, nx_ctx->ap->sglen); - in_sg = nx_build_sg_list(in_sg, (u8 *)data, + in_sg = nx_ctx->in_sg; + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); + + do { + /* + * to_process: the SHA256_BLOCK_SIZE data chunk to process in + * this update. This value is also restricted by the sg list + * limits. + */ + to_process = min_t(u64, total, nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process & ~(SHA256_BLOCK_SIZE - 1); + leftover = total - to_process; + + if (sctx->count) { + in_sg = nx_build_sg_list(nx_ctx->in_sg, + (u8 *) sctx->buf, + sctx->count, max_sg_len); + } + in_sg = nx_build_sg_list(in_sg, (u8 *) data, to_process - sctx->count, - nx_ctx->ap->sglen); - nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * - sizeof(struct nx_sg); - } else { - in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)data, - to_process, nx_ctx->ap->sglen); + max_sg_len); nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); - } - NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) { + /* + * we've hit the nx chip previously and we're updating + * again, so copy over the partial digest. + */ + memcpy(csbcpb->cpb.sha256.input_partial_digest, + csbcpb->cpb.sha256.message_digest, + SHA256_DIGEST_SIZE); + } - if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { - rc = -EINVAL; - goto out; - } + NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { + rc = -EINVAL; + goto out; + } - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, - desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; - atomic_inc(&(nx_ctx->stats->sha256_ops)); + atomic_inc(&(nx_ctx->stats->sha256_ops)); + csbcpb->cpb.sha256.message_bit_length += (u64) + (csbcpb->cpb.sha256.spbc * 8); - /* copy the leftover back into the state struct */ - memcpy(sctx->buf, data + len - leftover, leftover); - sctx->count = leftover; + /* everything after the first update is continuation */ + NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; - csbcpb->cpb.sha256.message_bit_length += (u64) - (csbcpb->cpb.sha256.spbc * 8); + total -= to_process; + data += to_process - sctx->count; + sctx->count = 0; + in_sg = nx_ctx->in_sg; + } while (leftover >= SHA256_BLOCK_SIZE); - /* everything after the first update is continuation */ - NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + /* copy the leftover back into the state struct */ + if (leftover) + memcpy(sctx->buf, data, leftover); + sctx->count = leftover; out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -128,8 +149,14 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg, *out_sg; + u32 max_sg_len; + unsigned long irq_flags; int rc; + spin_lock_irqsave(&nx_ctx->lock, irq_flags); + + max_sg_len = min_t(u32, nx_driver.of.max_sg_len, nx_ctx->ap->sglen); + if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) { /* we've hit the nx chip previously, now we're finalizing, * so copy over the partial digest */ @@ -144,9 +171,9 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha256.message_bit_length += (u64)(sctx->count * 8); in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buf, - sctx->count, nx_ctx->ap->sglen); + sctx->count, max_sg_len); out_sg = nx_build_sg_list(nx_ctx->out_sg, out, SHA256_DIGEST_SIZE, - nx_ctx->ap->sglen); + max_sg_len); nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); @@ -162,10 +189,11 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) atomic_inc(&(nx_ctx->stats->sha256_ops)); - atomic64_add(csbcpb->cpb.sha256.message_bit_length, + atomic64_add(csbcpb->cpb.sha256.message_bit_length / 8, &(nx_ctx->stats->sha256_bytes)); memcpy(out, csbcpb->cpb.sha256.message_digest, SHA256_DIGEST_SIZE); out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -175,6 +203,9 @@ static int nx_sha256_export(struct shash_desc *desc, void *out) struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct sha256_state *octx = out; + unsigned long irq_flags; + + spin_lock_irqsave(&nx_ctx->lock, irq_flags); octx->count = sctx->count + (csbcpb->cpb.sha256.message_bit_length / 8); @@ -197,6 +228,7 @@ static int nx_sha256_export(struct shash_desc *desc, void *out) octx->state[7] = SHA256_H7; } + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return 0; } @@ -206,6 +238,9 @@ static int nx_sha256_import(struct shash_desc *desc, const void *in) struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; const struct sha256_state *ictx = in; + unsigned long irq_flags; + + spin_lock_irqsave(&nx_ctx->lock, irq_flags); memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); @@ -220,6 +255,7 @@ static int nx_sha256_import(struct shash_desc *desc, const void *in) NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; } + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return 0; } diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index 3177b8c3d5f..4ae5b0f221d 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -55,72 +55,93 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; - u64 to_process, leftover, spbc_bits; + u64 to_process, leftover, total, spbc_bits; + u32 max_sg_len; + unsigned long irq_flags; int rc = 0; - if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) { - /* we've hit the nx chip previously and we're updating again, - * so copy over the partial digest */ - memcpy(csbcpb->cpb.sha512.input_partial_digest, - csbcpb->cpb.sha512.message_digest, SHA512_DIGEST_SIZE); - } + spin_lock_irqsave(&nx_ctx->lock, irq_flags); /* 2 cases for total data len: - * 1: <= SHA512_BLOCK_SIZE: copy into state, return 0 - * 2: > SHA512_BLOCK_SIZE: process X blocks, copy in leftover + * 1: < SHA512_BLOCK_SIZE: copy into state, return 0 + * 2: >= SHA512_BLOCK_SIZE: process X blocks, copy in leftover */ - if ((u64)len + sctx->count[0] <= SHA512_BLOCK_SIZE) { + total = sctx->count[0] + len; + if (total < SHA512_BLOCK_SIZE) { memcpy(sctx->buf + sctx->count[0], data, len); sctx->count[0] += len; goto out; } - /* to_process: the SHA512_BLOCK_SIZE data chunk to process in this - * update */ - to_process = (sctx->count[0] + len) & ~(SHA512_BLOCK_SIZE - 1); - leftover = (sctx->count[0] + len) & (SHA512_BLOCK_SIZE - 1); - - if (sctx->count[0]) { - in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buf, - sctx->count[0], nx_ctx->ap->sglen); - in_sg = nx_build_sg_list(in_sg, (u8 *)data, + in_sg = nx_ctx->in_sg; + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx->ap->sglen); + + do { + /* + * to_process: the SHA512_BLOCK_SIZE data chunk to process in + * this update. This value is also restricted by the sg list + * limits. + */ + to_process = min_t(u64, total, nx_ctx->ap->databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process & ~(SHA512_BLOCK_SIZE - 1); + leftover = total - to_process; + + if (sctx->count[0]) { + in_sg = nx_build_sg_list(nx_ctx->in_sg, + (u8 *) sctx->buf, + sctx->count[0], max_sg_len); + } + in_sg = nx_build_sg_list(in_sg, (u8 *) data, to_process - sctx->count[0], - nx_ctx->ap->sglen); - nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * - sizeof(struct nx_sg); - } else { - in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)data, - to_process, nx_ctx->ap->sglen); + max_sg_len); nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); - } - NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) { + /* + * we've hit the nx chip previously and we're updating + * again, so copy over the partial digest. + */ + memcpy(csbcpb->cpb.sha512.input_partial_digest, + csbcpb->cpb.sha512.message_digest, + SHA512_DIGEST_SIZE); + } - if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { - rc = -EINVAL; - goto out; - } - - rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, - desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + if (!nx_ctx->op.inlen || !nx_ctx->op.outlen) { + rc = -EINVAL; + goto out; + } + + rc = nx_hcall_sync(nx_ctx, &nx_ctx->op, + desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + + atomic_inc(&(nx_ctx->stats->sha512_ops)); + spbc_bits = csbcpb->cpb.sha512.spbc * 8; + csbcpb->cpb.sha512.message_bit_length_lo += spbc_bits; + if (csbcpb->cpb.sha512.message_bit_length_lo < spbc_bits) + csbcpb->cpb.sha512.message_bit_length_hi++; + + /* everything after the first update is continuation */ + NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; - atomic_inc(&(nx_ctx->stats->sha512_ops)); + total -= to_process; + data += to_process - sctx->count[0]; + sctx->count[0] = 0; + in_sg = nx_ctx->in_sg; + } while (leftover >= SHA512_BLOCK_SIZE); /* copy the leftover back into the state struct */ - memcpy(sctx->buf, data + len - leftover, leftover); + if (leftover) + memcpy(sctx->buf, data, leftover); sctx->count[0] = leftover; - - spbc_bits = csbcpb->cpb.sha512.spbc * 8; - csbcpb->cpb.sha512.message_bit_length_lo += spbc_bits; - if (csbcpb->cpb.sha512.message_bit_length_lo < spbc_bits) - csbcpb->cpb.sha512.message_bit_length_hi++; - - /* everything after the first update is continuation */ - NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -130,9 +151,15 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg, *out_sg; + u32 max_sg_len; u64 count0; + unsigned long irq_flags; int rc; + spin_lock_irqsave(&nx_ctx->lock, irq_flags); + + max_sg_len = min_t(u32, nx_driver.of.max_sg_len, nx_ctx->ap->sglen); + if (NX_CPB_FDM(csbcpb) & NX_FDM_CONTINUATION) { /* we've hit the nx chip previously, now we're finalizing, * so copy over the partial digest */ @@ -151,9 +178,9 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha512.message_bit_length_hi++; in_sg = nx_build_sg_list(nx_ctx->in_sg, sctx->buf, sctx->count[0], - nx_ctx->ap->sglen); + max_sg_len); out_sg = nx_build_sg_list(nx_ctx->out_sg, out, SHA512_DIGEST_SIZE, - nx_ctx->ap->sglen); + max_sg_len); nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); @@ -168,11 +195,12 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) goto out; atomic_inc(&(nx_ctx->stats->sha512_ops)); - atomic64_add(csbcpb->cpb.sha512.message_bit_length_lo, + atomic64_add(csbcpb->cpb.sha512.message_bit_length_lo / 8, &(nx_ctx->stats->sha512_bytes)); memcpy(out, csbcpb->cpb.sha512.message_digest, SHA512_DIGEST_SIZE); out: + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return rc; } @@ -182,6 +210,9 @@ static int nx_sha512_export(struct shash_desc *desc, void *out) struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct sha512_state *octx = out; + unsigned long irq_flags; + + spin_lock_irqsave(&nx_ctx->lock, irq_flags); /* move message_bit_length (128 bits) into count and convert its value * to bytes */ @@ -213,6 +244,7 @@ static int nx_sha512_export(struct shash_desc *desc, void *out) octx->state[7] = SHA512_H7; } + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return 0; } @@ -222,6 +254,9 @@ static int nx_sha512_import(struct shash_desc *desc, const void *in) struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; const struct sha512_state *ictx = in; + unsigned long irq_flags; + + spin_lock_irqsave(&nx_ctx->lock, irq_flags); memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); sctx->count[0] = ictx->count[0] & 0x3f; @@ -239,6 +274,7 @@ static int nx_sha512_import(struct shash_desc *desc, const void *in) NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; } + spin_unlock_irqrestore(&nx_ctx->lock, irq_flags); return 0; } diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index d7f179cc2e9..5533fe31c90 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -33,8 +33,6 @@ #include <linux/scatterlist.h> #include <linux/device.h> #include <linux/of.h> -#include <asm/pSeries_reconfig.h> -#include <asm/abs_addr.h> #include <asm/hvcall.h> #include <asm/vio.h> @@ -63,8 +61,7 @@ int nx_hcall_sync(struct nx_crypto_ctx *nx_ctx, do { rc = vio_h_cop_sync(viodev, op); - } while ((rc == -EBUSY && !may_sleep && retries--) || - (rc == -EBUSY && may_sleep && cond_resched())); + } while (rc == -EBUSY && !may_sleep && retries--); if (rc) { dev_dbg(&viodev->dev, "vio_h_cop_sync failed: rc: %d " @@ -104,10 +101,10 @@ struct nx_sg *nx_build_sg_list(struct nx_sg *sg_head, /* determine the start and end for this address range - slightly * different if this is in VMALLOC_REGION */ if (is_vmalloc_addr(start_addr)) - sg_addr = phys_to_abs(page_to_phys(vmalloc_to_page(start_addr))) + sg_addr = page_to_phys(vmalloc_to_page(start_addr)) + offset_in_page(sg_addr); else - sg_addr = virt_to_abs(sg_addr); + sg_addr = __pa(sg_addr); end_addr = sg_addr + len; @@ -116,13 +113,29 @@ struct nx_sg *nx_build_sg_list(struct nx_sg *sg_head, * have been described (or @sgmax elements have been written), the * loop ends. min_t is used to ensure @end_addr falls on the same page * as sg_addr, if not, we need to create another nx_sg element for the - * data on the next page */ + * data on the next page. + * + * Also when using vmalloc'ed data, every time that a system page + * boundary is crossed the physical address needs to be re-calculated. + */ for (sg = sg_head; sg_len < len; sg++) { + u64 next_page; + sg->addr = sg_addr; - sg_addr = min_t(u64, NX_PAGE_NUM(sg_addr + NX_PAGE_SIZE), end_addr); - sg->len = sg_addr - sg->addr; + sg_addr = min_t(u64, NX_PAGE_NUM(sg_addr + NX_PAGE_SIZE), + end_addr); + + next_page = (sg->addr & PAGE_MASK) + PAGE_SIZE; + sg->len = min_t(u64, sg_addr, next_page) - sg->addr; sg_len += sg->len; + if (sg_addr >= next_page && + is_vmalloc_addr(start_addr + sg_len)) { + sg_addr = page_to_phys(vmalloc_to_page( + start_addr + sg_len)); + end_addr = sg_addr + len - sg_len; + } + if ((sg - sg_head) == sgmax) { pr_err("nx: scatter/gather list overflow, pid: %d\n", current->pid); @@ -198,6 +211,8 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *nx_dst, * @dst: destination scatterlist * @src: source scatterlist * @nbytes: length of data described in the scatterlists + * @offset: number of bytes to fast-forward past at the beginning of + * scatterlists. * @iv: destination for the iv data, if the algorithm requires it * * This is common code shared by all the AES algorithms. It uses the block @@ -209,48 +224,27 @@ int nx_build_sg_lists(struct nx_crypto_ctx *nx_ctx, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes, + unsigned int offset, u8 *iv) { struct nx_sg *nx_insg = nx_ctx->in_sg; struct nx_sg *nx_outsg = nx_ctx->out_sg; - struct blkcipher_walk walk; - int rc; - - blkcipher_walk_init(&walk, dst, src, nbytes); - rc = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); - if (rc) - goto out; if (iv) - memcpy(iv, walk.iv, AES_BLOCK_SIZE); - - while (walk.nbytes) { - nx_insg = nx_build_sg_list(nx_insg, walk.src.virt.addr, - walk.nbytes, nx_ctx->ap->sglen); - nx_outsg = nx_build_sg_list(nx_outsg, walk.dst.virt.addr, - walk.nbytes, nx_ctx->ap->sglen); - - rc = blkcipher_walk_done(desc, &walk, 0); - if (rc) - break; - } + memcpy(iv, desc->info, AES_BLOCK_SIZE); - if (walk.nbytes) { - nx_insg = nx_build_sg_list(nx_insg, walk.src.virt.addr, - walk.nbytes, nx_ctx->ap->sglen); - nx_outsg = nx_build_sg_list(nx_outsg, walk.dst.virt.addr, - walk.nbytes, nx_ctx->ap->sglen); - - rc = 0; - } + nx_insg = nx_walk_and_build(nx_insg, nx_ctx->ap->sglen, src, + offset, nbytes); + nx_outsg = nx_walk_and_build(nx_outsg, nx_ctx->ap->sglen, dst, + offset, nbytes); /* these lengths should be negative, which will indicate to phyp that * the input and output parameters are scatterlists, not linear * buffers */ nx_ctx->op.inlen = (nx_ctx->in_sg - nx_insg) * sizeof(struct nx_sg); nx_ctx->op.outlen = (nx_ctx->out_sg - nx_outsg) * sizeof(struct nx_sg); -out: - return rc; + + return 0; } /** @@ -261,21 +255,22 @@ out: */ void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function) { + spin_lock_init(&nx_ctx->lock); memset(nx_ctx->kmem, 0, nx_ctx->kmem_len); nx_ctx->csbcpb->csb.valid |= NX_CSB_VALID_BIT; nx_ctx->op.flags = function; - nx_ctx->op.csbcpb = virt_to_abs(nx_ctx->csbcpb); - nx_ctx->op.in = virt_to_abs(nx_ctx->in_sg); - nx_ctx->op.out = virt_to_abs(nx_ctx->out_sg); + nx_ctx->op.csbcpb = __pa(nx_ctx->csbcpb); + nx_ctx->op.in = __pa(nx_ctx->in_sg); + nx_ctx->op.out = __pa(nx_ctx->out_sg); if (nx_ctx->csbcpb_aead) { nx_ctx->csbcpb_aead->csb.valid |= NX_CSB_VALID_BIT; nx_ctx->op_aead.flags = function; - nx_ctx->op_aead.csbcpb = virt_to_abs(nx_ctx->csbcpb_aead); - nx_ctx->op_aead.in = virt_to_abs(nx_ctx->in_sg); - nx_ctx->op_aead.out = virt_to_abs(nx_ctx->out_sg); + nx_ctx->op_aead.csbcpb = __pa(nx_ctx->csbcpb_aead); + nx_ctx->op_aead.in = __pa(nx_ctx->in_sg); + nx_ctx->op_aead.out = __pa(nx_ctx->out_sg); } } @@ -456,6 +451,8 @@ static int nx_register_algs(void) if (rc) goto out; + nx_driver.of.status = NX_OKAY; + rc = crypto_register_alg(&nx_ecb_aes_alg); if (rc) goto out; @@ -500,8 +497,6 @@ static int nx_register_algs(void) if (rc) goto out_unreg_s512; - nx_driver.of.status = NX_OKAY; - goto out; out_unreg_s512: @@ -636,8 +631,7 @@ void nx_crypto_ctx_exit(struct crypto_tfm *tfm) nx_ctx->out_sg = NULL; } -static int __devinit nx_probe(struct vio_dev *viodev, - const struct vio_device_id *id) +static int nx_probe(struct vio_dev *viodev, const struct vio_device_id *id) { dev_dbg(&viodev->dev, "driver probed: %s resource id: 0x%x\n", viodev->name, viodev->resource_id); @@ -655,7 +649,7 @@ static int __devinit nx_probe(struct vio_dev *viodev, return nx_register_algs(); } -static int __devexit nx_remove(struct vio_dev *viodev) +static int nx_remove(struct vio_dev *viodev) { dev_dbg(&viodev->dev, "entering nx_remove for UA 0x%x\n", viodev->unit_address); @@ -691,7 +685,7 @@ static void __exit nx_fini(void) vio_unregister_driver(&nx_driver.viodriver); } -static struct vio_device_id nx_crypto_driver_ids[] __devinitdata = { +static struct vio_device_id nx_crypto_driver_ids[] = { { "ibm,sym-encryption-v1", "ibm,sym-encryption" }, { "", "" } }; diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 3232b182dd2..befda07ca1d 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -117,6 +117,7 @@ struct nx_ctr_priv { }; struct nx_crypto_ctx { + spinlock_t lock; /* synchronize access to the context */ void *kmem; /* unaligned, kmalloc'd buffer */ size_t kmem_len; /* length of kmem */ struct nx_csbcpb *csbcpb; /* aligned page given to phyp @ hcall time */ @@ -155,7 +156,7 @@ int nx_hcall_sync(struct nx_crypto_ctx *ctx, struct vio_pfo_op *op, struct nx_sg *nx_build_sg_list(struct nx_sg *, u8 *, unsigned int, u32); int nx_build_sg_lists(struct nx_crypto_ctx *, struct blkcipher_desc *, struct scatterlist *, struct scatterlist *, unsigned int, - u8 *); + unsigned int, u8 *); struct nx_sg *nx_walk_and_build(struct nx_sg *, unsigned int, struct scatterlist *, unsigned int, unsigned int); |
