aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 14:53:12 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 14:53:12 -0700
commit797994f81a8b2bdca2eecffa415c1e7a89a4f961 (patch)
tree1383dc469c26ad37fdf960f682d9a48c782935c5
parentc8d8566952fda026966784a62f324c8352f77430 (diff)
parent3862de1f6c442d53bd828d39f86d07d933a70605 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: - XTS mode optimisation for twofish/cast6/camellia/aes on x86 - AVX2/x86_64 implementation for blowfish/twofish/serpent/camellia - SSSE3/AVX/AVX2 optimisations for sha256/sha512 - Added driver for SAHARA2 crypto accelerator - Fix for GMAC when used in non-IPsec secnarios - Added generic CMAC implementation (including IPsec glue) - IP update for crypto/atmel - Support for more than one device in hwrng/timeriomem - Added Broadcom BCM2835 RNG driver - Misc fixes * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (59 commits) crypto: caam - fix job ring cleanup code crypto: camellia - add AVX2/AES-NI/x86_64 assembler implementation of camellia cipher crypto: serpent - add AVX2/x86_64 assembler implementation of serpent cipher crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher crypto: tcrypt - add async cipher speed tests for blowfish crypto: testmgr - extend camellia test-vectors for camellia-aesni/avx2 crypto: aesni_intel - fix Kconfig problem with CRYPTO_GLUE_HELPER_X86 crypto: aesni_intel - add more optimized XTS mode for x86-64 crypto: x86/camellia-aesni-avx - add more optimized XTS code crypto: cast6-avx: use new optimized XTS code crypto: x86/twofish-avx - use optimized XTS code crypto: x86 - add more optimized XTS-mode for serpent-avx xfrm: add rfc4494 AES-CMAC-96 support crypto: add CMAC support to CryptoAPI crypto: testmgr - add empty test vectors for null ciphers crypto: testmgr - add AES GMAC test vectors crypto: gcm - fix rfc4543 to handle async crypto correctly crypto: gcm - make GMAC work when dst and src are different hwrng: timeriomem - added devicetree hooks ...
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt15
-rw-r--r--Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt18
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm2835.txt13
-rw-r--r--Documentation/hw_random.txt2
-rw-r--r--arch/arm/mach-at91/at91sam9g45_devices.c14
-rw-r--r--arch/x86/crypto/Makefile57
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S117
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c80
-rw-r--r--arch/x86/crypto/blowfish-avx2-asm_64.S449
-rw-r--r--arch/x86/crypto/blowfish_avx2_glue.c585
-rw-r--r--arch/x86/crypto/blowfish_glue.c32
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S180
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S1368
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c586
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c104
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c91
-rw-r--r--arch/x86/crypto/crc32-pclmul_asm.S6
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S10
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx.S61
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx2.S180
-rw-r--r--arch/x86/crypto/glue_helper.c97
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S45
-rw-r--r--arch/x86/crypto/serpent-avx2-asm_64.S800
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c562
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c145
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S496
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S772
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S506
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c275
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S423
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S743
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S421
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c282
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/twofish-avx2-asm_64.S600
-rw-r--r--arch/x86/crypto/twofish_avx2_glue.c584
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c101
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/crypto/blowfish.h43
-rw-r--r--arch/x86/include/asm/crypto/camellia.h19
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h24
-rw-r--r--arch/x86/include/asm/crypto/serpent-avx.h29
-rw-r--r--arch/x86/include/asm/crypto/twofish.h18
-rw-r--r--crypto/Kconfig133
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/cmac.c315
-rw-r--r--crypto/crypto_user.c4
-rw-r--r--crypto/gcm.c116
-rw-r--r--crypto/sha256_generic.c11
-rw-r--r--crypto/sha512_generic.c13
-rw-r--r--crypto/tcrypt.c30
-rw-r--r--crypto/testmgr.c95
-rw-r--r--crypto/testmgr.h1314
-rw-r--r--drivers/char/hw_random/Kconfig12
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/bcm2835-rng.c113
-rw-r--r--drivers/char/hw_random/exynos-rng.c3
-rw-r--r--drivers/char/hw_random/mxc-rnga.c21
-rw-r--r--drivers/char/hw_random/timeriomem-rng.c190
-rw-r--r--drivers/crypto/Kconfig18
-rw-r--r--drivers/crypto/Makefile1
-rw-r--r--drivers/crypto/atmel-aes.c471
-rw-r--r--drivers/crypto/atmel-sha-regs.h7
-rw-r--r--drivers/crypto/atmel-sha.c586
-rw-r--r--drivers/crypto/atmel-tdes-regs.h2
-rw-r--r--drivers/crypto/atmel-tdes.c394
-rw-r--r--drivers/crypto/bfin_crc.c6
-rw-r--r--drivers/crypto/caam/Kconfig2
-rw-r--r--drivers/crypto/caam/caamalg.c6
-rw-r--r--drivers/crypto/caam/caamhash.c4
-rw-r--r--drivers/crypto/caam/ctrl.c3
-rw-r--r--drivers/crypto/caam/error.c10
-rw-r--r--drivers/crypto/caam/intern.h1
-rw-r--r--drivers/crypto/caam/jr.c4
-rw-r--r--drivers/crypto/caam/key_gen.c2
-rw-r--r--drivers/crypto/caam/key_gen.h2
-rw-r--r--drivers/crypto/caam/regs.h4
-rw-r--r--drivers/crypto/omap-aes.c15
-rw-r--r--drivers/crypto/omap-sham.c15
-rw-r--r--drivers/crypto/picoxcell_crypto.c4
-rw-r--r--drivers/crypto/sahara.c1070
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c6
-rw-r--r--include/crypto/sha.h5
-rw-r--r--include/linux/platform_data/atmel-aes.h22
-rw-r--r--include/linux/platform_data/crypto-atmel.h22
-rw-r--r--include/linux/timeriomem-rng.h5
-rw-r--r--net/xfrm/xfrm_algo.c13
88 files changed, 15378 insertions, 744 deletions
diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt
new file mode 100644
index 00000000000..5c65eccd0e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt
@@ -0,0 +1,15 @@
+Freescale SAHARA Cryptographic Accelerator included in some i.MX chips.
+Currently only i.MX27 is supported.
+
+Required properties:
+- compatible : Should be "fsl,<soc>-sahara"
+- reg : Should contain SAHARA registers location and length
+- interrupts : Should contain SAHARA interrupt number
+
+Example:
+
+sah@10025000 {
+ compatible = "fsl,imx27-sahara";
+ reg = < 0x10025000 0x800>;
+ interrupts = <75>;
+};
diff --git a/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt b/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt
new file mode 100644
index 00000000000..6616d15866a
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt
@@ -0,0 +1,18 @@
+HWRNG support for the timeriomem_rng driver
+
+Required properties:
+- compatible : "timeriomem_rng"
+- reg : base address to sample from
+- period : wait time in microseconds to use between samples
+
+N.B. currently 'reg' must be four bytes wide and aligned
+
+Example:
+
+hwrng@44 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "timeriomem_rng";
+ reg = <0x44 0x04>;
+ period = <1000000>;
+};
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
new file mode 100644
index 00000000000..07ccdaa6832
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
@@ -0,0 +1,13 @@
+BCM2835 Random number generator
+
+Required properties:
+
+- compatible : should be "brcm,bcm2835-rng"
+- reg : Specifies base physical address and size of the registers.
+
+Example:
+
+rng {
+ compatible = "brcm,bcm2835-rng";
+ reg = <0x7e104000 0x10>;
+};
diff --git a/Documentation/hw_random.txt b/Documentation/hw_random.txt
index 690f52550c8..026e237bbc8 100644
--- a/Documentation/hw_random.txt
+++ b/Documentation/hw_random.txt
@@ -63,7 +63,7 @@ Intel RNG Driver notes:
* FIXME: support poll(2)
- NOTE: request_mem_region was removed, for two reasons:
+ NOTE: request_mem_region was removed, for three reasons:
1) Only one RNG is supported by this driver, 2) The location
used by the RNG is a fixed location in MMIO-addressable memory,
3) users with properly working BIOS e820 handling will always
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index 827c9f2a70f..f0bf68268ca 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -18,7 +18,7 @@
#include <linux/platform_device.h>
#include <linux/i2c-gpio.h>
#include <linux/atmel-mci.h>
-#include <linux/platform_data/atmel-aes.h>
+#include <linux/platform_data/crypto-atmel.h>
#include <linux/platform_data/at91_adc.h>
@@ -1900,7 +1900,8 @@ static void __init at91_add_device_tdes(void) {}
* -------------------------------------------------------------------- */
#if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE)
-static struct aes_platform_data aes_data;
+static struct crypto_platform_data aes_data;
+static struct crypto_dma_data alt_atslave;
static u64 aes_dmamask = DMA_BIT_MASK(32);
static struct resource aes_resources[] = {
@@ -1931,23 +1932,20 @@ static struct platform_device at91sam9g45_aes_device = {
static void __init at91_add_device_aes(void)
{
struct at_dma_slave *atslave;
- struct aes_dma_data *alt_atslave;
-
- alt_atslave = kzalloc(sizeof(struct aes_dma_data), GFP_KERNEL);
/* DMA TX slave channel configuration */
- atslave = &alt_atslave->txdata;
+ atslave = &alt_atslave.txdata;
atslave->dma_dev = &at_hdmac_device.dev;
atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_SRC_H2SEL_HW |
ATC_SRC_PER(AT_DMA_ID_AES_RX);
/* DMA RX slave channel configuration */
- atslave = &alt_atslave->rxdata;
+ atslave = &alt_atslave.rxdata;
atslave->dma_dev = &at_hdmac_device.dev;
atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_DST_H2SEL_HW |
ATC_DST_PER(AT_DMA_ID_AES_TX);
- aes_data.dma_slave = alt_atslave;
+ aes_data.dma_slave = &alt_atslave;
platform_device_register(&at91sam9g45_aes_device);
}
#else
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 63947a8f9f0..a3a0ed80f17 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,10 @@
# Arch-specific CryptoAPI modules.
#
+avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
+avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
+ $(comma)4)$(comma)%ymm2,yes,no)
+
obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -12,22 +16,37 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
-obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o
-obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
-obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
-obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
+obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
+obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
+
+# These modules require assembler to support AVX.
+ifeq ($(avx_supported),yes)
+ obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
+ camellia-aesni-avx-x86_64.o
+ obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
+ obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
+ obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
+ obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
+endif
+
+# These modules require assembler to support AVX2.
+ifeq ($(avx2_supported),yes)
+ obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
+ obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
+ obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
+ obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
+endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
@@ -36,21 +55,35 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
-camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
- camellia_aesni_avx_glue.o
-cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
-cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
-serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
+
+ifeq ($(avx_supported),yes)
+ camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
+ camellia_aesni_avx_glue.o
+ cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
+ cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
+ twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o \
+ twofish_avx_glue.o
+ serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o \
+ serpent_avx_glue.o
+endif
+
+ifeq ($(avx2_supported),yes)
+ blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
+ camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
+ serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
+ twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
+endif
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
crc32c-intel-y := crc32c-intel_glue.o
-crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
+crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
+sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
+sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 04b797767b9..62fe22cd4cb 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -34,6 +34,10 @@
#ifdef __x86_64__
.data
+.align 16
+.Lgf128mul_x_ble_mask:
+ .octa 0x00000000000000010000000000000087
+
POLY: .octa 0xC2000000000000000000000000000001
TWOONE: .octa 0x00000001000000000000000000000001
@@ -105,6 +109,8 @@ enc: .octa 0x2
#define CTR %xmm11
#define INC %xmm12
+#define GF128MUL_MASK %xmm10
+
#ifdef __x86_64__
#define AREG %rax
#define KEYP %rdi
@@ -2636,4 +2642,115 @@ ENTRY(aesni_ctr_enc)
.Lctr_enc_just_ret:
ret
ENDPROC(aesni_ctr_enc)
+
+/*
+ * _aesni_gf128mul_x_ble: internal ABI
+ * Multiply in GF(2^128) for XTS IVs
+ * input:
+ * IV: current IV
+ * GF128MUL_MASK == mask with 0x87 and 0x01
+ * output:
+ * IV: next IV
+ * changed:
+ * CTR: == temporary value
+ */
+#define _aesni_gf128mul_x_ble() \
+ pshufd $0x13, IV, CTR; \
+ paddq IV, IV; \
+ psrad $31, CTR; \
+ pand GF128MUL_MASK, CTR; \
+ pxor CTR, IV;
+
+/*
+ * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * bool enc, u8 *iv)
+ */
+ENTRY(aesni_xts_crypt8)
+ cmpb $0, %cl
+ movl $0, %ecx
+ movl $240, %r10d
+ leaq _aesni_enc4, %r11
+ leaq _aesni_dec4, %rax
+ cmovel %r10d, %ecx
+ cmoveq %rax, %r11
+
+ movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
+ movups (IVP), IV
+
+ mov 480(KEYP), KLEN
+ addq %rcx, KEYP
+
+ movdqa IV, STATE1
+ pxor 0x00(INP), STATE1
+ movdqu IV, 0x00(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE2
+ pxor 0x10(INP), STATE2
+ movdqu IV, 0x10(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE3
+ pxor 0x20(INP), STATE3
+ movdqu IV, 0x20(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE4
+ pxor 0x30(INP), STATE4
+ movdqu IV, 0x30(OUTP)
+
+ call *%r11
+
+ pxor 0x00(OUTP), STATE1
+ movdqu STATE1, 0x00(OUTP)
+
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE1
+ pxor 0x40(INP), S