diff options
Diffstat (limited to 'arch/sparc/crypto')
| -rw-r--r-- | arch/sparc/crypto/Makefile | 25 | ||||
| -rw-r--r-- | arch/sparc/crypto/aes_asm.S | 1543 | ||||
| -rw-r--r-- | arch/sparc/crypto/aes_glue.c | 504 | ||||
| -rw-r--r-- | arch/sparc/crypto/camellia_asm.S | 563 | ||||
| -rw-r--r-- | arch/sparc/crypto/camellia_glue.c | 327 | ||||
| -rw-r--r-- | arch/sparc/crypto/crc32c_asm.S | 20 | ||||
| -rw-r--r-- | arch/sparc/crypto/crc32c_glue.c | 181 | ||||
| -rw-r--r-- | arch/sparc/crypto/crop_devid.c | 14 | ||||
| -rw-r--r-- | arch/sparc/crypto/des_asm.S | 419 | ||||
| -rw-r--r-- | arch/sparc/crypto/des_glue.c | 537 | ||||
| -rw-r--r-- | arch/sparc/crypto/md5_asm.S | 70 | ||||
| -rw-r--r-- | arch/sparc/crypto/md5_glue.c | 190 | ||||
| -rw-r--r-- | arch/sparc/crypto/opcodes.h | 99 | ||||
| -rw-r--r-- | arch/sparc/crypto/sha1_asm.S | 72 | ||||
| -rw-r--r-- | arch/sparc/crypto/sha1_glue.c | 185 | ||||
| -rw-r--r-- | arch/sparc/crypto/sha256_asm.S | 78 | ||||
| -rw-r--r-- | arch/sparc/crypto/sha256_glue.c | 243 | ||||
| -rw-r--r-- | arch/sparc/crypto/sha512_asm.S | 102 | ||||
| -rw-r--r-- | arch/sparc/crypto/sha512_glue.c | 228 | 
19 files changed, 5400 insertions, 0 deletions
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile new file mode 100644 index 00000000000..5d469d81761 --- /dev/null +++ b/arch/sparc/crypto/Makefile @@ -0,0 +1,25 @@ +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o +obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o +obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o +obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o + +obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o +obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o +obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o + +obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o + +sha1-sparc64-y := sha1_asm.o sha1_glue.o +sha256-sparc64-y := sha256_asm.o sha256_glue.o +sha512-sparc64-y := sha512_asm.o sha512_glue.o +md5-sparc64-y := md5_asm.o md5_glue.o + +aes-sparc64-y := aes_asm.o aes_glue.o +des-sparc64-y := des_asm.o des_glue.o +camellia-sparc64-y := camellia_asm.o camellia_glue.o + +crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S new file mode 100644 index 00000000000..1cda8aa7cb8 --- /dev/null +++ b/arch/sparc/crypto/aes_asm.S @@ -0,0 +1,1543 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ +	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \ +	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \ +	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \ +	AES_EROUND23(KEY_BASE +  6, T0, T1, I1) + +#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ +	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \ +	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \ +	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \ +	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \ +	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \ +	AES_EROUND23(KEY_BASE +  6, T0, T1, I1) \ +	AES_EROUND01(KEY_BASE +  4, T2, T3, I2) \ +	AES_EROUND23(KEY_BASE +  6, T2, T3, I3) + +#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ +	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \ +	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \ +	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \ +	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1) + +#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ +	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \ +	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \ +	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \ +	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \ +	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \ +	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1) \ +	AES_EROUND01_L(KEY_BASE +  4, T2, T3, I2) \ +	AES_EROUND23_L(KEY_BASE +  6, T2, T3, I3) + +	/* 10 rounds */ +#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) + +#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + +	/* 12 rounds */ +#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) + +#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ +	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + +	/* 14 rounds */ +#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ +	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + +#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ +	ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ +			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ +	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \ +	ldd	[%o0 + 0xd0], %f56; \ +	ldd	[%o0 + 0xd8], %f58; \ +	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \ +	ldd	[%o0 + 0xe0], %f60; \ +	ldd	[%o0 + 0xe8], %f62; \ +	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \ +	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \ +	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \ +	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \ +	AES_EROUND01(KEY_BASE +  48, I0, I1, KEY_BASE + 0) \ +	AES_EROUND23(KEY_BASE +  50, I0, I1, KEY_BASE + 2) \ +	AES_EROUND01(KEY_BASE +  48, I2, I3, KEY_BASE + 4) \ +	AES_EROUND23(KEY_BASE +  50, I2, I3, KEY_BASE + 6) \ +	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I0) \ +	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I1) \ +	ldd	[%o0 + 0x10], %f8; \ +	ldd	[%o0 + 0x18], %f10; \ +	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I2) \ +	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I3) \ +	ldd	[%o0 + 0x20], %f12; \ +	ldd	[%o0 + 0x28], %f14; + +#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ +	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \ +	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \ +	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \ +	AES_DROUND01(KEY_BASE +  6, T0, T1, I0) + +#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ +	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \ +	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \ +	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \ +	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \ +	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \ +	AES_DROUND01(KEY_BASE +  6, T0, T1, I0) \ +	AES_DROUND23(KEY_BASE +  4, T2, T3, I3) \ +	AES_DROUND01(KEY_BASE +  6, T2, T3, I2) + +#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ +	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \ +	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \ +	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \ +	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0) + +#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ +	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \ +	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \ +	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \ +	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \ +	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \ +	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0) \ +	AES_DROUND23_L(KEY_BASE +  4, T2, T3, I3) \ +	AES_DROUND01_L(KEY_BASE +  6, T2, T3, I2) + +	/* 10 rounds */ +#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) + +#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + +	/* 12 rounds */ +#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) + +#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ +	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + +	/* 14 rounds */ +#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ +	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + +#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ +	DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ +			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ +	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \ +	ldd	[%o0 + 0x18], %f56; \ +	ldd	[%o0 + 0x10], %f58; \ +	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \ +	ldd	[%o0 + 0x08], %f60; \ +	ldd	[%o0 + 0x00], %f62; \ +	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \ +	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \ +	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \ +	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \ +	AES_DROUND23(KEY_BASE +  48, I0, I1, KEY_BASE + 2) \ +	AES_DROUND01(KEY_BASE +  50, I0, I1, KEY_BASE + 0) \ +	AES_DROUND23(KEY_BASE +  48, I2, I3, KEY_BASE + 6) \ +	AES_DROUND01(KEY_BASE +  50, I2, I3, KEY_BASE + 4) \ +	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I1) \ +	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I0) \ +	ldd	[%o0 + 0xd8], %f8; \ +	ldd	[%o0 + 0xd0], %f10; \ +	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I3) \ +	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I2) \ +	ldd	[%o0 + 0xc8], %f12; \ +	ldd	[%o0 + 0xc0], %f14; + +	.align	32 +ENTRY(aes_sparc64_key_expand) +	/* %o0=input_key, %o1=output_key, %o2=key_len */ +	VISEntry +	ld	[%o0 + 0x00], %f0 +	ld	[%o0 + 0x04], %f1 +	ld	[%o0 + 0x08], %f2 +	ld	[%o0 + 0x0c], %f3 + +	std	%f0, [%o1 + 0x00] +	std	%f2, [%o1 + 0x08] +	add	%o1, 0x10, %o1 + +	cmp	%o2, 24 +	bl	2f +	 nop + +	be	1f +	 nop + +	/* 256-bit key expansion */ +	ld	[%o0 + 0x10], %f4 +	ld	[%o0 + 0x14], %f5 +	ld	[%o0 + 0x18], %f6 +	ld	[%o0 + 0x1c], %f7 + +	std	%f4, [%o1 + 0x00] +	std	%f6, [%o1 + 0x08] +	add	%o1, 0x10, %o1 + +	AES_KEXPAND1(0, 6, 0x0, 8) +	AES_KEXPAND2(2, 8, 10) +	AES_KEXPAND0(4, 10, 12) +	AES_KEXPAND2(6, 12, 14) +	AES_KEXPAND1(8, 14, 0x1, 16) +	AES_KEXPAND2(10, 16, 18) +	AES_KEXPAND0(12, 18, 20) +	AES_KEXPAND2(14, 20, 22) +	AES_KEXPAND1(16, 22, 0x2, 24) +	AES_KEXPAND2(18, 24, 26) +	AES_KEXPAND0(20, 26, 28) +	AES_KEXPAND2(22, 28, 30) +	AES_KEXPAND1(24, 30, 0x3, 32) +	AES_KEXPAND2(26, 32, 34) +	AES_KEXPAND0(28, 34, 36) +	AES_KEXPAND2(30, 36, 38) +	AES_KEXPAND1(32, 38, 0x4, 40) +	AES_KEXPAND2(34, 40, 42) +	AES_KEXPAND0(36, 42, 44) +	AES_KEXPAND2(38, 44, 46) +	AES_KEXPAND1(40, 46, 0x5, 48) +	AES_KEXPAND2(42, 48, 50) +	AES_KEXPAND0(44, 50, 52) +	AES_KEXPAND2(46, 52, 54) +	AES_KEXPAND1(48, 54, 0x6, 56) +	AES_KEXPAND2(50, 56, 58) + +	std	%f8, [%o1 + 0x00] +	std	%f10, [%o1 + 0x08] +	std	%f12, [%o1 + 0x10] +	std	%f14, [%o1 + 0x18] +	std	%f16, [%o1 + 0x20] +	std	%f18, [%o1 + 0x28] +	std	%f20, [%o1 + 0x30] +	std	%f22, [%o1 + 0x38] +	std	%f24, [%o1 + 0x40] +	std	%f26, [%o1 + 0x48] +	std	%f28, [%o1 + 0x50] +	std	%f30, [%o1 + 0x58] +	std	%f32, [%o1 + 0x60] +	std	%f34, [%o1 + 0x68] +	std	%f36, [%o1 + 0x70] +	std	%f38, [%o1 + 0x78] +	std	%f40, [%o1 + 0x80] +	std	%f42, [%o1 + 0x88] +	std	%f44, [%o1 + 0x90] +	std	%f46, [%o1 + 0x98] +	std	%f48, [%o1 + 0xa0] +	std	%f50, [%o1 + 0xa8] +	std	%f52, [%o1 + 0xb0] +	std	%f54, [%o1 + 0xb8] +	std	%f56, [%o1 + 0xc0] +	ba,pt	%xcc, 80f +	 std	%f58, [%o1 + 0xc8] + +1:	 +	/* 192-bit key expansion */ +	ld	[%o0 + 0x10], %f4 +	ld	[%o0 + 0x14], %f5 + +	std	%f4, [%o1 + 0x00] +	add	%o1, 0x08, %o1 + +	AES_KEXPAND1(0, 4, 0x0, 6) +	AES_KEXPAND2(2, 6, 8) +	AES_KEXPAND2(4, 8, 10) +	AES_KEXPAND1(6, 10, 0x1, 12) +	AES_KEXPAND2(8, 12, 14) +	AES_KEXPAND2(10, 14, 16) +	AES_KEXPAND1(12, 16, 0x2, 18) +	AES_KEXPAND2(14, 18, 20) +	AES_KEXPAND2(16, 20, 22) +	AES_KEXPAND1(18, 22, 0x3, 24) +	AES_KEXPAND2(20, 24, 26) +	AES_KEXPAND2(22, 26, 28) +	AES_KEXPAND1(24, 28, 0x4, 30) +	AES_KEXPAND2(26, 30, 32) +	AES_KEXPAND2(28, 32, 34) +	AES_KEXPAND1(30, 34, 0x5, 36) +	AES_KEXPAND2(32, 36, 38) +	AES_KEXPAND2(34, 38, 40) +	AES_KEXPAND1(36, 40, 0x6, 42) +	AES_KEXPAND2(38, 42, 44) +	AES_KEXPAND2(40, 44, 46) +	AES_KEXPAND1(42, 46, 0x7, 48) +	AES_KEXPAND2(44, 48, 50) + +	std	%f6, [%o1 + 0x00] +	std	%f8, [%o1 + 0x08] +	std	%f10, [%o1 + 0x10] +	std	%f12, [%o1 + 0x18] +	std	%f14, [%o1 + 0x20] +	std	%f16, [%o1 + 0x28] +	std	%f18, [%o1 + 0x30] +	std	%f20, [%o1 + 0x38] +	std	%f22, [%o1 + 0x40] +	std	%f24, [%o1 + 0x48] +	std	%f26, [%o1 + 0x50] +	std	%f28, [%o1 + 0x58] +	std	%f30, [%o1 + 0x60] +	std	%f32, [%o1 + 0x68] +	std	%f34, [%o1 + 0x70] +	std	%f36, [%o1 + 0x78] +	std	%f38, [%o1 + 0x80] +	std	%f40, [%o1 + 0x88] +	std	%f42, [%o1 + 0x90] +	std	%f44, [%o1 + 0x98] +	std	%f46, [%o1 + 0xa0] +	std	%f48, [%o1 + 0xa8] +	ba,pt	%xcc, 80f +	 std	%f50, [%o1 + 0xb0] + +2: +	/* 128-bit key expansion */ +	AES_KEXPAND1(0, 2, 0x0, 4) +	AES_KEXPAND2(2, 4, 6) +	AES_KEXPAND1(4, 6, 0x1, 8) +	AES_KEXPAND2(6, 8, 10) +	AES_KEXPAND1(8, 10, 0x2, 12) +	AES_KEXPAND2(10, 12, 14) +	AES_KEXPAND1(12, 14, 0x3, 16) +	AES_KEXPAND2(14, 16, 18) +	AES_KEXPAND1(16, 18, 0x4, 20) +	AES_KEXPAND2(18, 20, 22) +	AES_KEXPAND1(20, 22, 0x5, 24) +	AES_KEXPAND2(22, 24, 26) +	AES_KEXPAND1(24, 26, 0x6, 28) +	AES_KEXPAND2(26, 28, 30) +	AES_KEXPAND1(28, 30, 0x7, 32) +	AES_KEXPAND2(30, 32, 34) +	AES_KEXPAND1(32, 34, 0x8, 36) +	AES_KEXPAND2(34, 36, 38) +	AES_KEXPAND1(36, 38, 0x9, 40) +	AES_KEXPAND2(38, 40, 42) + +	std	%f4, [%o1 + 0x00] +	std	%f6, [%o1 + 0x08] +	std	%f8, [%o1 + 0x10] +	std	%f10, [%o1 + 0x18] +	std	%f12, [%o1 + 0x20] +	std	%f14, [%o1 + 0x28] +	std	%f16, [%o1 + 0x30] +	std	%f18, [%o1 + 0x38] +	std	%f20, [%o1 + 0x40] +	std	%f22, [%o1 + 0x48] +	std	%f24, [%o1 + 0x50] +	std	%f26, [%o1 + 0x58] +	std	%f28, [%o1 + 0x60] +	std	%f30, [%o1 + 0x68] +	std	%f32, [%o1 + 0x70] +	std	%f34, [%o1 + 0x78] +	std	%f36, [%o1 + 0x80] +	std	%f38, [%o1 + 0x88] +	std	%f40, [%o1 + 0x90] +	std	%f42, [%o1 + 0x98] +80: +	retl +	 VISExit +ENDPROC(aes_sparc64_key_expand) + +	.align		32 +ENTRY(aes_sparc64_encrypt_128) +	/* %o0=key, %o1=input, %o2=output */ +	VISEntry +	ld		[%o1 + 0x00], %f4 +	ld		[%o1 + 0x04], %f5 +	ld		[%o1 + 0x08], %f6 +	ld		[%o1 + 0x0c], %f7 +	ldd		[%o0 + 0x00], %f8 +	ldd		[%o0 + 0x08], %f10 +	ldd		[%o0 + 0x10], %f12 +	ldd		[%o0 + 0x18], %f14 +	ldd		[%o0 + 0x20], %f16 +	ldd		[%o0 + 0x28], %f18 +	ldd		[%o0 + 0x30], %f20 +	ldd		[%o0 + 0x38], %f22 +	ldd		[%o0 + 0x40], %f24 +	ldd		[%o0 + 0x48], %f26 +	ldd		[%o0 + 0x50], %f28 +	ldd		[%o0 + 0x58], %f30 +	ldd		[%o0 + 0x60], %f32 +	ldd		[%o0 + 0x68], %f34 +	ldd		[%o0 + 0x70], %f36 +	ldd		[%o0 + 0x78], %f38 +	ldd		[%o0 + 0x80], %f40 +	ldd		[%o0 + 0x88], %f42 +	ldd		[%o0 + 0x90], %f44 +	ldd		[%o0 + 0x98], %f46 +	ldd		[%o0 + 0xa0], %f48 +	ldd		[%o0 + 0xa8], %f50 +	fxor		%f8, %f4, %f4 +	fxor		%f10, %f6, %f6 +	ENCRYPT_128(12, 4, 6, 0, 2) +	st		%f4, [%o2 + 0x00] +	st		%f5, [%o2 + 0x04] +	st		%f6, [%o2 + 0x08] +	st		%f7, [%o2 + 0x0c] +	retl +	 VISExit +ENDPROC(aes_sparc64_encrypt_128) + +	.align		32 +ENTRY(aes_sparc64_encrypt_192) +	/* %o0=key, %o1=input, %o2=output */ +	VISEntry +	ld		[%o1 + 0x00], %f4 +	ld		[%o1 + 0x04], %f5 +	ld		[%o1 + 0x08], %f6 +	ld		[%o1 + 0x0c], %f7 + +	ldd		[%o0 + 0x00], %f8 +	ldd		[%o0 + 0x08], %f10 + +	fxor		%f8, %f4, %f4 +	fxor		%f10, %f6, %f6 + +	ldd		[%o0 + 0x10], %f8 +	ldd		[%o0 + 0x18], %f10 +	ldd		[%o0 + 0x20], %f12 +	ldd		[%o0 + 0x28], %f14 +	add		%o0, 0x20, %o0 + +	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + +	ldd		[%o0 + 0x10], %f12 +	ldd		[%o0 + 0x18], %f14 +	ldd		[%o0 + 0x20], %f16 +	ldd		[%o0 + 0x28], %f18 +	ldd		[%o0 + 0x30], %f20 +	ldd		[%o0 + 0x38], %f22 +	ldd		[%o0 + 0x40], %f24 +	ldd		[%o0 + 0x48], %f26 +	ldd		[%o0 + 0x50], %f28 +	ldd		[%o0 + 0x58], %f30 +	ldd		[%o0 + 0x60], %f32 +	ldd		[%o0 + 0x68], %f34 +	ldd		[%o0 + 0x70], %f36 +	ldd		[%o0 + 0x78], %f38 +	ldd		[%o0 + 0x80], %f40 +	ldd		[%o0 + 0x88], %f42 +	ldd		[%o0 + 0x90], %f44 +	ldd		[%o0 + 0x98], %f46 +	ldd		[%o0 + 0xa0], %f48 +	ldd		[%o0 + 0xa8], %f50 + + +	ENCRYPT_128(12, 4, 6, 0, 2) + +	st		%f4, [%o2 + 0x00] +	st		%f5, [%o2 + 0x04] +	st		%f6, [%o2 + 0x08] +	st		%f7, [%o2 + 0x0c] + +	retl +	 VISExit +ENDPROC(aes_sparc64_encrypt_192) + +	.align		32 +ENTRY(aes_sparc64_encrypt_256) +	/* %o0=key, %o1=input, %o2=output */ +	VISEntry +	ld		[%o1 + 0x00], %f4 +	ld		[%o1 + 0x04], %f5 +	ld		[%o1 + 0x08], %f6 +	ld		[%o1 + 0x0c], %f7 + +	ldd		[%o0 + 0x00], %f8 +	ldd		[%o0 + 0x08], %f10 + +	fxor		%f8, %f4, %f4 +	fxor		%f10, %f6, %f6 + +	ldd		[%o0 + 0x10], %f8 + +	ldd		[%o0 + 0x18], %f10 +	ldd		[%o0 + 0x20], %f12 +	ldd		[%o0 + 0x28], %f14 +	add		%o0, 0x20, %o0 + +	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + +	ldd		[%o0 + 0x10], %f8 + +	ldd		[%o0 + 0x18], %f10 +	ldd		[%o0 + 0x20], %f12 +	ldd		[%o0 + 0x28], %f14 +	add		%o0, 0x20, %o0 + +	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + +	ldd		[%o0 + 0x10], %f12 +	ldd		[%o0 + 0x18], %f14 +	ldd		[%o0 + 0x20], %f16 +	ldd		[%o0 + 0x28], %f18 +	ldd		[%o0 + 0x30], %f20 +	ldd		[%o0 + 0x38], %f22 +	ldd		[%o0 + 0x40], %f24 +	ldd		[%o0 + 0x48], %f26 +	ldd		[%o0 + 0x50], %f28 +	ldd		[%o0 + 0x58], %f30 +	ldd		[%o0 + 0x60], %f32 +	ldd		[%o0 + 0x68], %f34 +	ldd		[%o0 + 0x70], %f36 +	ldd		[%o0 + 0x78], %f38 +	ldd		[%o0 + 0x80], %f40 +	ldd		[%o0 + 0x88], %f42 +	ldd		[%o0 + 0x90], %f44 +	ldd		[%o0 + 0x98], %f46 +	ldd		[%o0 + 0xa0], %f48 +	ldd		[%o0 + 0xa8], %f50 + +	ENCRYPT_128(12, 4, 6, 0, 2) + +	st		%f4, [%o2 + 0x00] +	st		%f5, [%o2 + 0x04] +	st		%f6, [%o2 + 0x08] +	st		%f7, [%o2 + 0x0c] + +	retl +	 VISExit +ENDPROC(aes_sparc64_encrypt_256) + +	.align		32 +ENTRY(aes_sparc64_decrypt_128) +	/* %o0=key, %o1=input, %o2=output */ +	VISEntry +	ld		[%o1 + 0x00], %f4 +	ld		[%o1 + 0x04], %f5 +	ld		[%o1 + 0x08], %f6 +	ld		[%o1 + 0x0c], %f7 +	ldd		[%o0 + 0xa0], %f8 +	ldd		[%o0 + 0xa8], %f10 +	ldd		[%o0 + 0x98], %f12 +	ldd		[%o0 + 0x90], %f14 +	ldd		[%o0 + 0x88], %f16 +	ldd		[%o0 + 0x80], %f18 +	ldd		[%o0 + 0x78], %f20 +	ldd		[%o0 + 0x70], %f22 +	ldd		[%o0 + 0x68], %f24 +	ldd		[%o0 + 0x60], %f26 +	ldd		[%o0 + 0x58], %f28 +	ldd		[%o0 + 0x50], %f30 +	ldd		[%o0 + 0x48], %f32 +	ldd		[%o0 + 0x40], %f34 +	ldd		[%o0 + 0x38], %f36 +	ldd		[%o0 + 0x30], %f38 +	ldd		[%o0 + 0x28], %f40 +	ldd		[%o0 + 0x20], %f42 +	ldd		[%o0 + 0x18], %f44 +	ldd		[%o0 + 0x10], %f46 +	ldd		[%o0 + 0x08], %f48 +	ldd		[%o0 + 0x00], %f50 +	fxor		%f8, %f4, %f4 +	fxor		%f10, %f6, %f6 +	DECRYPT_128(12, 4, 6, 0, 2) +	st		%f4, [%o2 + 0x00] +	st		%f5, [%o2 + 0x04] +	st		%f6, [%o2 + 0x08] +	st		%f7, [%o2 + 0x0c] +	retl +	 VISExit +ENDPROC(aes_sparc64_decrypt_128) + +	.align		32 +ENTRY(aes_sparc64_decrypt_192) +	/* %o0=key, %o1=input, %o2=output */ +	VISEntry +	ld		[%o1 + 0x00], %f4 +	ld		[%o1 + 0x04], %f5 +	ld		[%o1 + 0x08], %f6 +	ld		[%o1 + 0x0c], %f7 +	ldd		[%o0 + 0xc0], %f8 +	ldd		[%o0 + 0xc8], %f10 +	ldd		[%o0 + 0xb8], %f12 +	ldd		[%o0 + 0xb0], %f14 +	ldd		[%o0 + 0xa8], %f16 +	ldd		[%o0 + 0xa0], %f18 +	fxor		%f8, %f4, %f4 +	fxor		%f10, %f6, %f6 +	ldd		[%o0 + 0x98], %f20 +	ldd		[%o0 + 0x90], %f22 +	ldd		[%o0 + 0x88], %f24 +	ldd		[%o0 + 0x80], %f26 +	DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2) +	ldd		[%o0 + 0x78], %f28 +	ldd		[%o0 + 0x70], %f30 +	ldd		[%o0 + 0x68], %f32 +	ldd		[%o0 + 0x60], %f34 +	ldd		[%o0 + 0x58], %f36 +	ldd		[%o0 + 0x50], %f38 +	ldd		[%o0 + 0x48], %f40 +	ldd		[%o0 + 0x40], %f42 +	ldd		[%o0 + 0x38], %f44 +	ldd		[%o0 + 0x30], %f46 +	ldd		[%o0 + 0x28], %f48 +	ldd		[%o0 + 0x20], %f50 +	ldd		[%o0 + 0x18], %f52 +	ldd		[%o0 + 0x10], %f54 +	ldd		[%o0 + 0x08], %f56 +	ldd		[%o0 + 0x00], %f58 +	DECRYPT_128(20, 4, 6, 0, 2) +	st		%f4, [%o2 + 0x00] +	st		%f5, [%o2 + 0x04] +	st		%f6, [%o2 + 0x08] +	st		%f7, [%o2 + 0x0c] +	retl +	 VISExit +ENDPROC(aes_sparc64_decrypt_192) + +	.align		32 +ENTRY(aes_sparc64_decrypt_256) +	/* %o0=key, %o1=input, %o2=output */ +	VISEntry +	ld		[%o1 + 0x00], %f4 +	ld		[%o1 + 0x04], %f5 +	ld		[%o1 + 0x08], %f6 +	ld		[%o1 + 0x0c], %f7 +	ldd		[%o0 + 0xe0], %f8 +	ldd		[%o0 + 0xe8], %f10 +	ldd		[%o0 + 0xd8], %f12 +	ldd		[%o0 + 0xd0], %f14 +	ldd		[%o0 + 0xc8], %f16 +	fxor		%f8, %f4, %f4 +	ldd		[%o0 + 0xc0], %f18 +	fxor		%f10, %f6, %f6 +	ldd		[%o0 + 0xb8], %f20 +	AES_DROUND23(12, 4, 6, 2) +	ldd		[%o0 + 0xb0], %f22 +	AES_DROUND01(14, 4, 6, 0) +	ldd		[%o0 + 0xa8], %f24 +	AES_DROUND23(16, 0, 2, 6) +	ldd		[%o0 + 0xa0], %f26 +	AES_DROUND01(18, 0, 2, 4) +	ldd		[%o0 + 0x98], %f12 +	AES_DROUND23(20, 4, 6, 2) +	ldd		[%o0 + 0x90], %f14 +	AES_DROUND01(22, 4, 6, 0) +	ldd		[%o0 + 0x88], %f16 +	AES_DROUND23(24, 0, 2, 6) +	ldd		[%o0 + 0x80], %f18 +	AES_DROUND01(26, 0, 2, 4) +	ldd		[%o0 + 0x78], %f20 +	AES_DROUND23(12, 4, 6, 2) +	ldd		[%o0 + 0x70], %f22 +	AES_DROUND01(14, 4, 6, 0) +	ldd		[%o0 + 0x68], %f24 +	AES_DROUND23(16, 0, 2, 6) +	ldd		[%o0 + 0x60], %f26 +	AES_DROUND01(18, 0, 2, 4) +	ldd		[%o0 + 0x58], %f28 +	AES_DROUND23(20, 4, 6, 2) +	ldd		[%o0 + 0x50], %f30 +	AES_DROUND01(22, 4, 6, 0) +	ldd		[%o0 + 0x48], %f32 +	AES_DROUND23(24, 0, 2, 6) +	ldd		[%o0 + 0x40], %f34 +	AES_DROUND01(26, 0, 2, 4) +	ldd		[%o0 + 0x38], %f36 +	AES_DROUND23(28, 4, 6, 2) +	ldd		[%o0 + 0x30], %f38 +	AES_DROUND01(30, 4, 6, 0) +	ldd		[%o0 + 0x28], %f40 +	AES_DROUND23(32, 0, 2, 6) +	ldd		[%o0 + 0x20], %f42 +	AES_DROUND01(34, 0, 2, 4) +	ldd		[%o0 + 0x18], %f44 +	AES_DROUND23(36, 4, 6, 2) +	ldd		[%o0 + 0x10], %f46 +	AES_DROUND01(38, 4, 6, 0) +	ldd		[%o0 + 0x08], %f48 +	AES_DROUND23(40, 0, 2, 6) +	ldd		[%o0 + 0x00], %f50 +	AES_DROUND01(42, 0, 2, 4) +	AES_DROUND23(44, 4, 6, 2) +	AES_DROUND01(46, 4, 6, 0) +	AES_DROUND23_L(48, 0, 2, 6) +	AES_DROUND01_L(50, 0, 2, 4) +	st		%f4, [%o2 + 0x00] +	st		%f5, [%o2 + 0x04] +	st		%f6, [%o2 + 0x08] +	st		%f7, [%o2 + 0x0c] +	retl +	 VISExit +ENDPROC(aes_sparc64_decrypt_256) + +	.align		32 +ENTRY(aes_sparc64_load_encrypt_keys_128) +	/* %o0=key */ +	VISEntry +	ldd		[%o0 + 0x10], %f8 +	ldd		[%o0 + 0x18], %f10 +	ldd		[%o0 + 0x20], %f12 +	ldd		[%o0 + 0x28], %f14 +	ldd		[%o0 + 0x30], %f16 +	ldd		[%o0 + 0x38], %f18 +	ldd		[%o0 + 0x40], %f20 +	ldd		[%o0 + 0x48], %f22 +	ldd		[%o0 + 0x50], %f24 +	ldd		[%o0 + 0x58], %f26 +	ldd		[%o0 + 0x60], %f28 +	ldd		[%o0 + 0x68], %f30 +	ldd		[%o0 + 0x70], %f32 +	ldd		[%o0 + 0x78], %f34 +	ldd		[%o0 + 0x80], %f36 +	ldd		[%o0 + 0x88], %f38 +	ldd		[%o0 + 0x90], %f40 +	ldd		[%o0 + 0x98], %f42 +	ldd		[%o0 + 0xa0], %f44 +	retl +	 ldd		[%o0 + 0xa8], %f46 +ENDPROC(aes_sparc64_load_encrypt_keys_128) + +	.align		32 +ENTRY(aes_sparc64_load_encrypt_keys_192) +	/* %o0=key */ +	VISEntry +	ldd		[%o0 + 0x10], %f8 +	ldd		[%o0 + 0x18], %f10 +	ldd		[%o0 + 0x20], %f12 +	ldd		[%o0 + 0x28], %f14 +	ldd		[%o0 + 0x30], %f16 +	ldd		[%o0 + 0x38], %f18 +	ldd		[%o0 + 0x40], %f20 +	ldd		[%o0 + 0x48], %f22 +	ldd		[%o0 + 0x50], %f24 +	ldd		[%o0 + 0x58], %f26 +	ldd		[%o0 + 0x60], %f28 +	ldd		[%o0 + 0x68], %f30 +	ldd		[%o0 + 0x70], %f32 +	ldd		[%o0 + 0x78], %f34 +	ldd		[%o0 + 0x80], %f36 +	ldd		[%o0 + 0x88], %f38 +	ldd		[%o0 + 0x90], %f40 +	ldd		[%o0 + 0x98], %f42 +	ldd		[%o0 + 0xa0], %f44 +	ldd		[%o0 + 0xa8], %f46 +	ldd		[%o0 + 0xb0], %f48 +	ldd		[%o0 + 0xb8], %f50 +	ldd		[%o0 + 0xc0], %f52 +	retl +	 ldd		[%o0 + 0xc8], %f54 +ENDPROC(aes_sparc64_load_encrypt_keys_192) + +	.align		32 +ENTRY(aes_sparc64_load_encrypt_keys_256) +	/* %o0=key */ +	VISEntry +	ldd		[%o0 + 0x10], %f8 +	ldd		[%o0 + 0x18], %f10 +	ldd		[%o0 + 0x20], %f12 +	ldd		[%o0 + 0x28], %f14 +	ldd		[%o0 + 0x30], %f16 +	ldd		[%o0 + 0x38], %f18 +	ldd		[%o0 + 0x40], %f20 +	ldd		[%o0 + 0x48], %f22 +	ldd		[%o0 + 0x50], %f24 +	ldd		[%o0 + 0x58], %f26 +	ldd		[%o0 + 0x60], %f28 +	ldd		[%o0 + 0x68], %f30 +	ldd		[%o0 + 0x70], %f32 +	ldd		[%o0 + 0x78], %f34 +	ldd		[%o0 + 0x80], %f36 +	ldd		[%o0 + 0x88], %f38 +	ldd		[%o0 + 0x90], %f40 +	ldd		[%o0 + 0x98], %f42 +	ldd		[%o0 + 0xa0], %f44 +	ldd		[%o0 + 0xa8], %f46 +	ldd		[%o0 + 0xb0], %f48 +	ldd		[%o0 + 0xb8], %f50 +	ldd		[%o0 + 0xc0], %f52 +	ldd		[%o0 + 0xc8], %f54 +	ldd		[%o0 + 0xd0], %f56 +	ldd		[%o0 + 0xd8], %f58 +	ldd		[%o0 + 0xe0], %f60 +	retl +	 ldd		[%o0 + 0xe8], %f62 +ENDPROC(aes_sparc64_load_encrypt_keys_256) + +	.align		32 +ENTRY(aes_sparc64_load_decrypt_keys_128) +	/* %o0=key */ +	VISEntry +	ldd		[%o0 + 0x98], %f8 +	ldd		[%o0 + 0x90], %f10 +	ldd		[%o0 + 0x88], %f12 +	ldd		[%o0 + 0x80], %f14 +	ldd		[%o0 + 0x78], %f16 +	ldd		[%o0 + 0x70], %f18 +	ldd		[%o0 + 0x68], %f20 +	ldd		[%o0 + 0x60], %f22 +	ldd		[%o0 + 0x58], %f24 +	ldd		[%o0 + 0x50], %f26 +	ldd		[%o0 + 0x48], %f28 +	ldd		[%o0 + 0x40], %f30 +	ldd		[%o0 + 0x38], %f32 +	ldd		[%o0 + 0x30], %f34 +	ldd		[%o0 + 0x28], %f36 +	ldd		[%o0 + 0x20], %f38 +	ldd		[%o0 + 0x18], %f40 +	ldd		[%o0 + 0x10], %f42 +	ldd		[%o0 + 0x08], %f44 +	retl +	 ldd		[%o0 + 0x00], %f46 +ENDPROC(aes_sparc64_load_decrypt_keys_128) + +	.align		32 +ENTRY(aes_sparc64_load_decrypt_keys_192) +	/* %o0=key */ +	VISEntry +	ldd		[%o0 + 0xb8], %f8 +	ldd		[%o0 + 0xb0], %f10 +	ldd		[%o0 + 0xa8], %f12 +	ldd		[%o0 + 0xa0], %f14 +	ldd		[%o0 + 0x98], %f16 +	ldd		[%o0 + 0x90], %f18 +	ldd		[%o0 + 0x88], %f20 +	ldd		[%o0 + 0x80], %f22 +	ldd		[%o0 + 0x78], %f24 +	ldd		[%o0 + 0x70], %f26 +	ldd		[%o0 + 0x68], %f28 +	ldd		[%o0 + 0x60], %f30 +	ldd		[%o0 + 0x58], %f32 +	ldd		[%o0 + 0x50], %f34 +	ldd		[%o0 + 0x48], %f36 +	ldd		[%o0 + 0x40], %f38 +	ldd		[%o0 + 0x38], %f40 +	ldd		[%o0 + 0x30], %f42 +	ldd		[%o0 + 0x28], %f44 +	ldd		[%o0 + 0x20], %f46 +	ldd		[%o0 + 0x18], %f48 +	ldd		[%o0 + 0x10], %f50 +	ldd		[%o0 + 0x08], %f52 +	retl +	 ldd		[%o0 + 0x00], %f54 +ENDPROC(aes_sparc64_load_decrypt_keys_192) + +	.align		32 +ENTRY(aes_sparc64_load_decrypt_keys_256) +	/* %o0=key */ +	VISEntry +	ldd		[%o0 + 0xd8], %f8 +	ldd		[%o0 + 0xd0], %f10 +	ldd		[%o0 + 0xc8], %f12 +	ldd		[%o0 + 0xc0], %f14 +	ldd		[%o0 + 0xb8], %f16 +	ldd		[%o0 + 0xb0], %f18 +	ldd		[%o0 + 0xa8], %f20 +	ldd		[%o0 + 0xa0], %f22 +	ldd		[%o0 + 0x98], %f24 +	ldd		[%o0 + 0x90], %f26 +	ldd		[%o0 + 0x88], %f28 +	ldd		[%o0 + 0x80], %f30 +	ldd		[%o0 + 0x78], %f32 +	ldd		[%o0 + 0x70], %f34 +	ldd		[%o0 + 0x68], %f36 +	ldd		[%o0 + 0x60], %f38 +	ldd		[%o0 + 0x58], %f40 +	ldd		[%o0 + 0x50], %f42 +	ldd		[%o0 + 0x48], %f44 +	ldd		[%o0 + 0x40], %f46 +	ldd		[%o0 + 0x38], %f48 +	ldd		[%o0 + 0x30], %f50 +	ldd		[%o0 + 0x28], %f52 +	ldd		[%o0 + 0x20], %f54 +	ldd		[%o0 + 0x18], %f56 +	ldd		[%o0 + 0x10], %f58 +	ldd		[%o0 + 0x08], %f60 +	retl +	 ldd		[%o0 + 0x00], %f62 +ENDPROC(aes_sparc64_load_decrypt_keys_256) + +	.align		32 +ENTRY(aes_sparc64_ecb_encrypt_128) +	/* %o0=key, %o1=input, %o2=output, %o3=len */ +	ldx		[%o0 + 0x00], %g1 +	subcc		%o3, 0x10, %o3 +	be		10f +	 ldx		[%o0 + 0x08], %g2 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	ldx		[%o1 + 0x10], %o4 +	ldx		[%o1 + 0x18], %o5 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	xor		%g1, %o4, %g3 +	xor		%g2, %o5, %g7 +	MOVXTOD_G3_F60 +	MOVXTOD_G7_F62 +	ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	std		%f60, [%o2 + 0x10] +	std		%f62, [%o2 + 0x18] +	sub		%o3, 0x20, %o3 +	add		%o1, 0x20, %o1 +	brgz		%o3, 1b +	 add		%o2, 0x20, %o2 +	brlz,pt		%o3, 11f +	 nop +10:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	ENCRYPT_128(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +11:	retl +	 nop +ENDPROC(aes_sparc64_ecb_encrypt_128) + +	.align		32 +ENTRY(aes_sparc64_ecb_encrypt_192) +	/* %o0=key, %o1=input, %o2=output, %o3=len */ +	ldx		[%o0 + 0x00], %g1 +	subcc		%o3, 0x10, %o3 +	be		10f +	 ldx		[%o0 + 0x08], %g2 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	ldx		[%o1 + 0x10], %o4 +	ldx		[%o1 + 0x18], %o5 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	xor		%g1, %o4, %g3 +	xor		%g2, %o5, %g7 +	MOVXTOD_G3_F60 +	MOVXTOD_G7_F62 +	ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	std		%f60, [%o2 + 0x10] +	std		%f62, [%o2 + 0x18] +	sub		%o3, 0x20, %o3 +	add		%o1, 0x20, %o1 +	brgz		%o3, 1b +	 add		%o2, 0x20, %o2 +	brlz,pt		%o3, 11f +	 nop +10:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	ENCRYPT_192(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +11:	retl +	 nop +ENDPROC(aes_sparc64_ecb_encrypt_192) + +	.align		32 +ENTRY(aes_sparc64_ecb_encrypt_256) +	/* %o0=key, %o1=input, %o2=output, %o3=len */ +	ldx		[%o0 + 0x00], %g1 +	subcc		%o3, 0x10, %o3 +	be		10f +	 ldx		[%o0 + 0x08], %g2 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	ldx		[%o1 + 0x10], %o4 +	ldx		[%o1 + 0x18], %o5 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	xor		%g1, %o4, %g3 +	xor		%g2, %o5, %g7 +	MOVXTOD_G3_F0 +	MOVXTOD_G7_F2 +	ENCRYPT_256_2(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	std		%f0, [%o2 + 0x10] +	std		%f2, [%o2 + 0x18] +	sub		%o3, 0x20, %o3 +	add		%o1, 0x20, %o1 +	brgz		%o3, 1b +	 add		%o2, 0x20, %o2 +	brlz,pt		%o3, 11f +	 nop +10:	ldd		[%o0 + 0xd0], %f56 +	ldd		[%o0 + 0xd8], %f58 +	ldd		[%o0 + 0xe0], %f60 +	ldd		[%o0 + 0xe8], %f62 +	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	ENCRYPT_256(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +11:	retl +	 nop +ENDPROC(aes_sparc64_ecb_encrypt_256) + +	.align		32 +ENTRY(aes_sparc64_ecb_decrypt_128) +	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ +	ldx		[%o0 - 0x10], %g1 +	subcc		%o3, 0x10, %o3 +	be		10f +	 ldx		[%o0 - 0x08], %g2 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	ldx		[%o1 + 0x10], %o4 +	ldx		[%o1 + 0x18], %o5 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	xor		%g1, %o4, %g3 +	xor		%g2, %o5, %g7 +	MOVXTOD_G3_F60 +	MOVXTOD_G7_F62 +	DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	std		%f60, [%o2 + 0x10] +	std		%f62, [%o2 + 0x18] +	sub		%o3, 0x20, %o3 +	add		%o1, 0x20, %o1 +	brgz,pt		%o3, 1b +	 add		%o2, 0x20, %o2 +	brlz,pt		%o3, 11f +	 nop +10:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	DECRYPT_128(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +11:	retl +	 nop +ENDPROC(aes_sparc64_ecb_decrypt_128) + +	.align		32 +ENTRY(aes_sparc64_ecb_decrypt_192) +	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ +	ldx		[%o0 - 0x10], %g1 +	subcc		%o3, 0x10, %o3 +	be		10f +	 ldx		[%o0 - 0x08], %g2 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	ldx		[%o1 + 0x10], %o4 +	ldx		[%o1 + 0x18], %o5 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	xor		%g1, %o4, %g3 +	xor		%g2, %o5, %g7 +	MOVXTOD_G3_F60 +	MOVXTOD_G7_F62 +	DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	std		%f60, [%o2 + 0x10] +	std		%f62, [%o2 + 0x18] +	sub		%o3, 0x20, %o3 +	add		%o1, 0x20, %o1 +	brgz,pt		%o3, 1b +	 add		%o2, 0x20, %o2 +	brlz,pt		%o3, 11f +	 nop +10:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	DECRYPT_192(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +11:	retl +	 nop +ENDPROC(aes_sparc64_ecb_decrypt_192) + +	.align		32 +ENTRY(aes_sparc64_ecb_decrypt_256) +	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ +	ldx		[%o0 - 0x10], %g1 +	subcc		%o3, 0x10, %o3 +	ldx		[%o0 - 0x08], %g2 +	be		10f +	 sub		%o0, 0xf0, %o0 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	ldx		[%o1 + 0x10], %o4 +	ldx		[%o1 + 0x18], %o5 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	xor		%g1, %o4, %g3 +	xor		%g2, %o5, %g7 +	MOVXTOD_G3_F0 +	MOVXTOD_G7_F2 +	DECRYPT_256_2(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	std		%f0, [%o2 + 0x10] +	std		%f2, [%o2 + 0x18] +	sub		%o3, 0x20, %o3 +	add		%o1, 0x20, %o1 +	brgz,pt		%o3, 1b +	 add		%o2, 0x20, %o2 +	brlz,pt		%o3, 11f +	 nop +10:	ldd		[%o0 + 0x18], %f56 +	ldd		[%o0 + 0x10], %f58 +	ldd		[%o0 + 0x08], %f60 +	ldd		[%o0 + 0x00], %f62 +	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	DECRYPT_256(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +11:	retl +	 nop +ENDPROC(aes_sparc64_ecb_decrypt_256) + +	.align		32 +ENTRY(aes_sparc64_cbc_encrypt_128) +	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ +	ldd		[%o4 + 0x00], %f4 +	ldd		[%o4 + 0x08], %f6 +	ldx		[%o0 + 0x00], %g1 +	ldx		[%o0 + 0x08], %g2 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	add		%o1, 0x10, %o1 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F0 +	MOVXTOD_G7_F2 +	fxor		%f4, %f0, %f4 +	fxor		%f6, %f2, %f6 +	ENCRYPT_128(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	subcc		%o3, 0x10, %o3 +	bne,pt		%xcc, 1b +	 add		%o2, 0x10, %o2 +	std		%f4, [%o4 + 0x00] +	std		%f6, [%o4 + 0x08] +	retl +	 nop +ENDPROC(aes_sparc64_cbc_encrypt_128) + +	.align		32 +ENTRY(aes_sparc64_cbc_encrypt_192) +	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ +	ldd		[%o4 + 0x00], %f4 +	ldd		[%o4 + 0x08], %f6 +	ldx		[%o0 + 0x00], %g1 +	ldx		[%o0 + 0x08], %g2 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	add		%o1, 0x10, %o1 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F0 +	MOVXTOD_G7_F2 +	fxor		%f4, %f0, %f4 +	fxor		%f6, %f2, %f6 +	ENCRYPT_192(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	subcc		%o3, 0x10, %o3 +	bne,pt		%xcc, 1b +	 add		%o2, 0x10, %o2 +	std		%f4, [%o4 + 0x00] +	std		%f6, [%o4 + 0x08] +	retl +	 nop +ENDPROC(aes_sparc64_cbc_encrypt_192) + +	.align		32 +ENTRY(aes_sparc64_cbc_encrypt_256) +	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ +	ldd		[%o4 + 0x00], %f4 +	ldd		[%o4 + 0x08], %f6 +	ldx		[%o0 + 0x00], %g1 +	ldx		[%o0 + 0x08], %g2 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	add		%o1, 0x10, %o1 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F0 +	MOVXTOD_G7_F2 +	fxor		%f4, %f0, %f4 +	fxor		%f6, %f2, %f6 +	ENCRYPT_256(8, 4, 6, 0, 2) +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	subcc		%o3, 0x10, %o3 +	bne,pt		%xcc, 1b +	 add		%o2, 0x10, %o2 +	std		%f4, [%o4 + 0x00] +	std		%f6, [%o4 + 0x08] +	retl +	 nop +ENDPROC(aes_sparc64_cbc_encrypt_256) + +	.align		32 +ENTRY(aes_sparc64_cbc_decrypt_128) +	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ +	ldx		[%o0 - 0x10], %g1 +	ldx		[%o0 - 0x08], %g2 +	ldx		[%o4 + 0x00], %o0 +	ldx		[%o4 + 0x08], %o5 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	add		%o1, 0x10, %o1 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	DECRYPT_128(8, 4, 6, 0, 2) +	MOVXTOD_O0_F0 +	MOVXTOD_O5_F2 +	xor		%g1, %g3, %o0 +	xor		%g2, %g7, %o5 +	fxor		%f4, %f0, %f4 +	fxor		%f6, %f2, %f6 +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	subcc		%o3, 0x10, %o3 +	bne,pt		%xcc, 1b +	 add		%o2, 0x10, %o2 +	stx		%o0, [%o4 + 0x00] +	stx		%o5, [%o4 + 0x08] +	retl +	 nop +ENDPROC(aes_sparc64_cbc_decrypt_128) + +	.align		32 +ENTRY(aes_sparc64_cbc_decrypt_192) +	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ +	ldx		[%o0 - 0x10], %g1 +	ldx		[%o0 - 0x08], %g2 +	ldx		[%o4 + 0x00], %o0 +	ldx		[%o4 + 0x08], %o5 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	add		%o1, 0x10, %o1 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	DECRYPT_192(8, 4, 6, 0, 2) +	MOVXTOD_O0_F0 +	MOVXTOD_O5_F2 +	xor		%g1, %g3, %o0 +	xor		%g2, %g7, %o5 +	fxor		%f4, %f0, %f4 +	fxor		%f6, %f2, %f6 +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	subcc		%o3, 0x10, %o3 +	bne,pt		%xcc, 1b +	 add		%o2, 0x10, %o2 +	stx		%o0, [%o4 + 0x00] +	stx		%o5, [%o4 + 0x08] +	retl +	 nop +ENDPROC(aes_sparc64_cbc_decrypt_192) + +	.align		32 +ENTRY(aes_sparc64_cbc_decrypt_256) +	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ +	ldx		[%o0 - 0x10], %g1 +	ldx		[%o0 - 0x08], %g2 +	ldx		[%o4 + 0x00], %o0 +	ldx		[%o4 + 0x08], %o5 +1:	ldx		[%o1 + 0x00], %g3 +	ldx		[%o1 + 0x08], %g7 +	add		%o1, 0x10, %o1 +	xor		%g1, %g3, %g3 +	xor		%g2, %g7, %g7 +	MOVXTOD_G3_F4 +	MOVXTOD_G7_F6 +	DECRYPT_256(8, 4, 6, 0, 2) +	MOVXTOD_O0_F0 +	MOVXTOD_O5_F2 +	xor		%g1, %g3, %o0 +	xor		%g2, %g7, %o5 +	fxor		%f4, %f0, %f4 +	fxor		%f6, %f2, %f6 +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +	subcc		%o3, 0x10, %o3 +	bne,pt		%xcc, 1b +	 add		%o2, 0x10, %o2 +	stx		%o0, [%o4 + 0x00] +	stx		%o5, [%o4 + 0x08] +	retl +	 nop +ENDPROC(aes_sparc64_cbc_decrypt_256) + +	.align		32 +ENTRY(aes_sparc64_ctr_crypt_128) +	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ +	ldx		[%o4 + 0x00], %g3 +	ldx		[%o4 + 0x08], %g7 +	subcc		%o3, 0x10, %o3 +	ldx		[%o0 + 0x00], %g1 +	be		10f +	 ldx		[%o0 + 0x08], %g2 +1:	xor		%g1, %g3, %o5 +	MOVXTOD_O5_F0 +	xor		%g2, %g7, %o5 +	MOVXTOD_O5_F2 +	add		%g7, 1, %g7 +	add		%g3, 1, %o5 +	movrz		%g7, %o5, %g3 +	xor		%g1, %g3, %o5 +	MOVXTOD_O5_F4 +	xor		%g2, %g7, %o5 +	MOVXTOD_O5_F6 +	add		%g7, 1, %g7 +	add		%g3, 1, %o5 +	movrz		%g7, %o5, %g3 +	ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62) +	ldd		[%o1 + 0x00], %f56 +	ldd		[%o1 + 0x08], %f58 +	ldd		[%o1 + 0x10], %f60 +	ldd		[%o1 + 0x18], %f62 +	fxor		%f56, %f0, %f56 +	fxor		%f58, %f2, %f58 +	fxor		%f60, %f4, %f60 +	fxor		%f62, %f6, %f62 +	std		%f56, [%o2 + 0x00] +	std		%f58, [%o2 + 0x08] +	std		%f60, [%o2 + 0x10] +	std		%f62, [%o2 + 0x18] +	subcc		%o3, 0x20, %o3 +	add		%o1, 0x20, %o1 +	brgz		%o3, 1b +	 add		%o2, 0x20, %o2 +	brlz,pt		%o3, 11f +	 nop +10:	xor		%g1, %g3, %o5 +	MOVXTOD_O5_F0 +	xor		%g2, %g7, %o5 +	MOVXTOD_O5_F2 +	add		%g7, 1, %g7 +	add		%g3, 1, %o5 +	movrz		%g7, %o5, %g3 +	ENCRYPT_128(8, 0, 2, 4, 6) +	ldd		[%o1 + 0x00], %f4 +	ldd		[%o1 + 0x08], %f6 +	fxor		%f4, %f0, %f4 +	fxor		%f6, %f2, %f6 +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +11:	stx		%g3, [%o4 + 0x00] +	retl +	 stx		%g7, [%o4 + 0x08] +ENDPROC(aes_sparc64_ctr_crypt_128) + +	.align		32 +ENTRY(aes_sparc64_ctr_crypt_192) +	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ +	ldx		[%o4 + 0x00], %g3 +	ldx		[%o4 + 0x08], %g7 +	subcc		%o3, 0x10, %o3 +	ldx		[%o0 + 0x00], %g1 +	be		10f +	 ldx		[%o0 + 0x08], %g2 +1:	xor		%g1, %g3, %o5 +	MOVXTOD_O5_F0 +	xor		%g2, %g7, %o5 +	MOVXTOD_O5_F2 +	add		%g7, 1, %g7 +	add		%g3, 1, %o5 +	movrz		%g7, %o5, %g3 +	xor		%g1, %g3, %o5 +	MOVXTOD_O5_F4 +	xor		%g2, %g7, %o5 +	MOVXTOD_O5_F6 +	add		%g7, 1, %g7 +	add		%g3, 1, %o5 +	movrz		%g7, %o5, %g3 +	ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62) +	ldd		[%o1 + 0x00], %f56 +	ldd		[%o1 + 0x08], %f58 +	ldd		[%o1 + 0x10], %f60 +	ldd		[%o1 + 0x18], %f62 +	fxor		%f56, %f0, %f56 +	fxor		%f58, %f2, %f58 +	fxor		%f60, %f4, %f60 +	fxor		%f62, %f6, %f62 +	std		%f56, [%o2 + 0x00] +	std		%f58, [%o2 + 0x08] +	std		%f60, [%o2 + 0x10] +	std		%f62, [%o2 + 0x18] +	subcc		%o3, 0x20, %o3 +	add		%o1, 0x20, %o1 +	brgz		%o3, 1b +	 add		%o2, 0x20, %o2 +	brlz,pt		%o3, 11f +	 nop +10:	xor		%g1, %g3, %o5 +	MOVXTOD_O5_F0 +	xor		%g2, %g7, %o5 +	MOVXTOD_O5_F2 +	add		%g7, 1, %g7 +	add		%g3, 1, %o5 +	movrz		%g7, %o5, %g3 +	ENCRYPT_192(8, 0, 2, 4, 6) +	ldd		[%o1 + 0x00], %f4 +	ldd		[%o1 + 0x08], %f6 +	fxor		%f4, %f0, %f4 +	fxor		%f6, %f2, %f6 +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +11:	stx		%g3, [%o4 + 0x00] +	retl +	 stx		%g7, [%o4 + 0x08] +ENDPROC(aes_sparc64_ctr_crypt_192) + +	.align		32 +ENTRY(aes_sparc64_ctr_crypt_256) +	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ +	ldx		[%o4 + 0x00], %g3 +	ldx		[%o4 + 0x08], %g7 +	subcc		%o3, 0x10, %o3 +	ldx		[%o0 + 0x00], %g1 +	be		10f +	 ldx		[%o0 + 0x08], %g2 +1:	xor		%g1, %g3, %o5 +	MOVXTOD_O5_F0 +	xor		%g2, %g7, %o5 +	MOVXTOD_O5_F2 +	add		%g7, 1, %g7 +	add		%g3, 1, %o5 +	movrz		%g7, %o5, %g3 +	xor		%g1, %g3, %o5 +	MOVXTOD_O5_F4 +	xor		%g2, %g7, %o5 +	MOVXTOD_O5_F6 +	add		%g7, 1, %g7 +	add		%g3, 1, %o5 +	movrz		%g7, %o5, %g3 +	ENCRYPT_256_2(8, 0, 2, 4, 6) +	ldd		[%o1 + 0x00], %f56 +	ldd		[%o1 + 0x08], %f58 +	ldd		[%o1 + 0x10], %f60 +	ldd		[%o1 + 0x18], %f62 +	fxor		%f56, %f0, %f56 +	fxor		%f58, %f2, %f58 +	fxor		%f60, %f4, %f60 +	fxor		%f62, %f6, %f62 +	std		%f56, [%o2 + 0x00] +	std		%f58, [%o2 + 0x08] +	std		%f60, [%o2 + 0x10] +	std		%f62, [%o2 + 0x18] +	subcc		%o3, 0x20, %o3 +	add		%o1, 0x20, %o1 +	brgz		%o3, 1b +	 add		%o2, 0x20, %o2 +	brlz,pt		%o3, 11f +	 nop +10:	ldd		[%o0 + 0xd0], %f56 +	ldd		[%o0 + 0xd8], %f58 +	ldd		[%o0 + 0xe0], %f60 +	ldd		[%o0 + 0xe8], %f62 +	xor		%g1, %g3, %o5 +	MOVXTOD_O5_F0 +	xor		%g2, %g7, %o5 +	MOVXTOD_O5_F2 +	add		%g7, 1, %g7 +	add		%g3, 1, %o5 +	movrz		%g7, %o5, %g3 +	ENCRYPT_256(8, 0, 2, 4, 6) +	ldd		[%o1 + 0x00], %f4 +	ldd		[%o1 + 0x08], %f6 +	fxor		%f4, %f0, %f4 +	fxor		%f6, %f2, %f6 +	std		%f4, [%o2 + 0x00] +	std		%f6, [%o2 + 0x08] +11:	stx		%g3, [%o4 + 0x00] +	retl +	 stx		%g7, [%o4 + 0x08] +ENDPROC(aes_sparc64_ctr_crypt_256) diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c new file mode 100644 index 00000000000..df922f52d76 --- /dev/null +++ b/arch/sparc/crypto/aes_glue.c @@ -0,0 +1,504 @@ +/* Glue code for AES encryption optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/aesni-intel_glue.c + * + * Copyright (C) 2008, Intel Corp. + *    Author: Huang Ying <ying.huang@intel.com> + * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + *    Authors: Adrian Hoban <adrian.hoban@intel.com> + *             Gabriele Paoloni <gabriele.paoloni@intel.com> + *             Tadeusz Struk (tadeusz.struk@intel.com) + *             Aidan O'Mahony (aidan.o.mahony@intel.com) + *    Copyright (c) 2010, Intel Corporation. + */ + +#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt + +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> + +#include <asm/fpumacro.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +struct aes_ops { +	void (*encrypt)(const u64 *key, const u32 *input, u32 *output); +	void (*decrypt)(const u64 *key, const u32 *input, u32 *output); +	void (*load_encrypt_keys)(const u64 *key); +	void (*load_decrypt_keys)(const u64 *key); +	void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output, +			    unsigned int len); +	void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output, +			    unsigned int len); +	void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output, +			    unsigned int len, u64 *iv); +	void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output, +			    unsigned int len, u64 *iv); +	void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output, +			  unsigned int len, u64 *iv); +}; + +struct crypto_sparc64_aes_ctx { +	struct aes_ops *ops; +	u64 key[AES_MAX_KEYLENGTH / sizeof(u64)]; +	u32 key_length; +	u32 expanded_key_length; +}; + +extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input, +				    u32 *output); +extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input, +				    u32 *output); +extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input, +				    u32 *output); + +extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input, +				    u32 *output); +extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input, +				    u32 *output); +extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input, +				    u32 *output); + +extern void aes_sparc64_load_encrypt_keys_128(const u64 *key); +extern void aes_sparc64_load_encrypt_keys_192(const u64 *key); +extern void aes_sparc64_load_encrypt_keys_256(const u64 *key); + +extern void aes_sparc64_load_decrypt_keys_128(const u64 *key); +extern void aes_sparc64_load_decrypt_keys_192(const u64 *key); +extern void aes_sparc64_load_decrypt_keys_256(const u64 *key); + +extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input, +					u64 *output, unsigned int len); +extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input, +					u64 *output, unsigned int len); +extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input, +					u64 *output, unsigned int len); + +extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input, +					u64 *output, unsigned int len); +extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input, +					u64 *output, unsigned int len); +extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input, +					u64 *output, unsigned int len); + +extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input, +					u64 *output, unsigned int len, +					u64 *iv); + +extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input, +					u64 *output, unsigned int len, +					u64 *iv); + +extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input, +					u64 *output, unsigned int len, +					u64 *iv); + +extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input, +					u64 *output, unsigned int len, +					u64 *iv); + +extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input, +					u64 *output, unsigned int len, +					u64 *iv); + +extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input, +					u64 *output, unsigned int len, +					u64 *iv); + +extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input, +				      u64 *output, unsigned int len, +				      u64 *iv); +extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input, +				      u64 *output, unsigned int len, +				      u64 *iv); +extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input, +				      u64 *output, unsigned int len, +				      u64 *iv); + +static struct aes_ops aes128_ops = { +	.encrypt		= aes_sparc64_encrypt_128, +	.decrypt		= aes_sparc64_decrypt_128, +	.load_encrypt_keys	= aes_sparc64_load_encrypt_keys_128, +	.load_decrypt_keys	= aes_sparc64_load_decrypt_keys_128, +	.ecb_encrypt		= aes_sparc64_ecb_encrypt_128, +	.ecb_decrypt		= aes_sparc64_ecb_decrypt_128, +	.cbc_encrypt		= aes_sparc64_cbc_encrypt_128, +	.cbc_decrypt		= aes_sparc64_cbc_decrypt_128, +	.ctr_crypt		= aes_sparc64_ctr_crypt_128, +}; + +static struct aes_ops aes192_ops = { +	.encrypt		= aes_sparc64_encrypt_192, +	.decrypt		= aes_sparc64_decrypt_192, +	.load_encrypt_keys	= aes_sparc64_load_encrypt_keys_192, +	.load_decrypt_keys	= aes_sparc64_load_decrypt_keys_192, +	.ecb_encrypt		= aes_sparc64_ecb_encrypt_192, +	.ecb_decrypt		= aes_sparc64_ecb_decrypt_192, +	.cbc_encrypt		= aes_sparc64_cbc_encrypt_192, +	.cbc_decrypt		= aes_sparc64_cbc_decrypt_192, +	.ctr_crypt		= aes_sparc64_ctr_crypt_192, +}; + +static struct aes_ops aes256_ops = { +	.encrypt		= aes_sparc64_encrypt_256, +	.decrypt		= aes_sparc64_decrypt_256, +	.load_encrypt_keys	= aes_sparc64_load_encrypt_keys_256, +	.load_decrypt_keys	= aes_sparc64_load_decrypt_keys_256, +	.ecb_encrypt		= aes_sparc64_ecb_encrypt_256, +	.ecb_decrypt		= aes_sparc64_ecb_decrypt_256, +	.cbc_encrypt		= aes_sparc64_cbc_encrypt_256, +	.cbc_decrypt		= aes_sparc64_cbc_decrypt_256, +	.ctr_crypt		= aes_sparc64_ctr_crypt_256, +}; + +extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key, +				   unsigned int key_len); + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, +		       unsigned int key_len) +{ +	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); +	u32 *flags = &tfm->crt_flags; + +	switch (key_len) { +	case AES_KEYSIZE_128: +		ctx->expanded_key_length = 0xb0; +		ctx->ops = &aes128_ops; +		break; + +	case AES_KEYSIZE_192: +		ctx->expanded_key_length = 0xd0; +		ctx->ops = &aes192_ops; +		break; + +	case AES_KEYSIZE_256: +		ctx->expanded_key_length = 0xf0; +		ctx->ops = &aes256_ops; +		break; + +	default: +		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; +		return -EINVAL; +	} + +	aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len); +	ctx->key_length = key_len; + +	return 0; +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ +	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + +	ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ +	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + +	ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); +} + +#define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1)) + +static int ecb_encrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	ctx->ops->load_encrypt_keys(&ctx->key[0]); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & AES_BLOCK_MASK; + +		if (likely(block_len)) { +			ctx->ops->ecb_encrypt(&ctx->key[0], +					      (const u64 *)walk.src.virt.addr, +					      (u64 *) walk.dst.virt.addr, +					      block_len); +		} +		nbytes &= AES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	u64 *key_end; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	ctx->ops->load_decrypt_keys(&ctx->key[0]); +	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & AES_BLOCK_MASK; + +		if (likely(block_len)) { +			ctx->ops->ecb_decrypt(key_end, +					      (const u64 *) walk.src.virt.addr, +					      (u64 *) walk.dst.virt.addr, block_len); +		} +		nbytes &= AES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); + +	return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	ctx->ops->load_encrypt_keys(&ctx->key[0]); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & AES_BLOCK_MASK; + +		if (likely(block_len)) { +			ctx->ops->cbc_encrypt(&ctx->key[0], +					      (const u64 *)walk.src.virt.addr, +					      (u64 *) walk.dst.virt.addr, +					      block_len, (u64 *) walk.iv); +		} +		nbytes &= AES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	u64 *key_end; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	ctx->ops->load_decrypt_keys(&ctx->key[0]); +	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & AES_BLOCK_MASK; + +		if (likely(block_len)) { +			ctx->ops->cbc_decrypt(key_end, +					      (const u64 *) walk.src.virt.addr, +					      (u64 *) walk.dst.virt.addr, +					      block_len, (u64 *) walk.iv); +		} +		nbytes &= AES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); + +	return err; +} + +static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx, +			    struct blkcipher_walk *walk) +{ +	u8 *ctrblk = walk->iv; +	u64 keystream[AES_BLOCK_SIZE / sizeof(u64)]; +	u8 *src = walk->src.virt.addr; +	u8 *dst = walk->dst.virt.addr; +	unsigned int nbytes = walk->nbytes; + +	ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk, +			      keystream, AES_BLOCK_SIZE); +	crypto_xor((u8 *) keystream, src, nbytes); +	memcpy(dst, keystream, nbytes); +	crypto_inc(ctrblk, AES_BLOCK_SIZE); +} + +static int ctr_crypt(struct blkcipher_desc *desc, +		     struct scatterlist *dst, struct scatterlist *src, +		     unsigned int nbytes) +{ +	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	ctx->ops->load_encrypt_keys(&ctx->key[0]); +	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { +		unsigned int block_len = nbytes & AES_BLOCK_MASK; + +		if (likely(block_len)) { +			ctx->ops->ctr_crypt(&ctx->key[0], +					    (const u64 *)walk.src.virt.addr, +					    (u64 *) walk.dst.virt.addr, +					    block_len, (u64 *) walk.iv); +		} +		nbytes &= AES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	if (walk.nbytes) { +		ctr_crypt_final(ctx, &walk); +		err = blkcipher_walk_done(desc, &walk, 0); +	} +	fprs_write(0); +	return err; +} + +static struct crypto_alg algs[] = { { +	.cra_name		= "aes", +	.cra_driver_name	= "aes-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER, +	.cra_blocksize		= AES_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx), +	.cra_alignmask		= 3, +	.cra_module		= THIS_MODULE, +	.cra_u	= { +		.cipher	= { +			.cia_min_keysize	= AES_MIN_KEY_SIZE, +			.cia_max_keysize	= AES_MAX_KEY_SIZE, +			.cia_setkey		= aes_set_key, +			.cia_encrypt		= aes_encrypt, +			.cia_decrypt		= aes_decrypt +		} +	} +}, { +	.cra_name		= "ecb(aes)", +	.cra_driver_name	= "ecb-aes-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER, +	.cra_blocksize		= AES_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx), +	.cra_alignmask		= 7, +	.cra_type		= &crypto_blkcipher_type, +	.cra_module		= THIS_MODULE, +	.cra_u = { +		.blkcipher = { +			.min_keysize	= AES_MIN_KEY_SIZE, +			.max_keysize	= AES_MAX_KEY_SIZE, +			.setkey		= aes_set_key, +			.encrypt	= ecb_encrypt, +			.decrypt	= ecb_decrypt, +		}, +	}, +}, { +	.cra_name		= "cbc(aes)", +	.cra_driver_name	= "cbc-aes-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER, +	.cra_blocksize		= AES_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx), +	.cra_alignmask		= 7, +	.cra_type		= &crypto_blkcipher_type, +	.cra_module		= THIS_MODULE, +	.cra_u = { +		.blkcipher = { +			.min_keysize	= AES_MIN_KEY_SIZE, +			.max_keysize	= AES_MAX_KEY_SIZE, +			.setkey		= aes_set_key, +			.encrypt	= cbc_encrypt, +			.decrypt	= cbc_decrypt, +		}, +	}, +}, { +	.cra_name		= "ctr(aes)", +	.cra_driver_name	= "ctr-aes-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER, +	.cra_blocksize		= 1, +	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx), +	.cra_alignmask		= 7, +	.cra_type		= &crypto_blkcipher_type, +	.cra_module		= THIS_MODULE, +	.cra_u = { +		.blkcipher = { +			.min_keysize	= AES_MIN_KEY_SIZE, +			.max_keysize	= AES_MAX_KEY_SIZE, +			.setkey		= aes_set_key, +			.encrypt	= ctr_crypt, +			.decrypt	= ctr_crypt, +		}, +	}, +} }; + +static bool __init sparc64_has_aes_opcode(void) +{ +	unsigned long cfr; + +	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) +		return false; + +	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); +	if (!(cfr & CFR_AES)) +		return false; + +	return true; +} + +static int __init aes_sparc64_mod_init(void) +{ +	int i; + +	for (i = 0; i < ARRAY_SIZE(algs); i++) +		INIT_LIST_HEAD(&algs[i].cra_list); + +	if (sparc64_has_aes_opcode()) { +		pr_info("Using sparc64 aes opcodes optimized AES implementation\n"); +		return crypto_register_algs(algs, ARRAY_SIZE(algs)); +	} +	pr_info("sparc64 aes opcodes not available.\n"); +	return -ENODEV; +} + +static void __exit aes_sparc64_mod_fini(void) +{ +	crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(aes_sparc64_mod_init); +module_exit(aes_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); + +MODULE_ALIAS("aes"); + +#include "crop_devid.c" diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S new file mode 100644 index 00000000000..cc39553a4e4 --- /dev/null +++ b/arch/sparc/crypto/camellia_asm.S @@ -0,0 +1,563 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ +	CAMELLIA_F(KEY_BASE +  0, I1, I0, I1) \ +	CAMELLIA_F(KEY_BASE +  2, I0, I1, I0) \ +	CAMELLIA_F(KEY_BASE +  4, I1, I0, I1) \ +	CAMELLIA_F(KEY_BASE +  6, I0, I1, I0) \ +	CAMELLIA_F(KEY_BASE +  8, I1, I0, I1) \ +	CAMELLIA_F(KEY_BASE + 10, I0, I1, I0) + +#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \ +	CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ +	CAMELLIA_FL(KEY_BASE + 12, I0, I0) \ +	CAMELLIA_FLI(KEY_BASE + 14, I1, I1) + +	.data + +	.align	8 +SIGMA:	.xword	0xA09E667F3BCC908B +	.xword	0xB67AE8584CAA73B2 +	.xword	0xC6EF372FE94F82BE +	.xword	0x54FF53A5F1D36F1C +	.xword	0x10E527FADE682D1D +	.xword	0xB05688C2B3E6C1FD + +	.text + +	.align	32 +ENTRY(camellia_sparc64_key_expand) +	/* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */ +	VISEntry +	ld	[%o0 + 0x00], %f0	! i0, k[0] +	ld	[%o0 + 0x04], %f1	! i1, k[1] +	ld	[%o0 + 0x08], %f2	! i2, k[2] +	ld	[%o0 + 0x0c], %f3	! i3, k[3] +	std	%f0, [%o1 + 0x00]	! k[0, 1] +	fsrc2	%f0, %f28 +	std	%f2, [%o1 + 0x08]	! k[2, 3] +	cmp	%o2, 16 +	be	10f +	 fsrc2	%f2, %f30 + +	ld	[%o0 + 0x10], %f0 +	ld	[%o0 + 0x14], %f1 +	std	%f0, [%o1 + 0x20]	! k[8, 9] +	cmp	%o2, 24 +	fone	%f10 +	be,a	1f +	 fxor	%f10, %f0, %f2 +	ld	[%o0 + 0x18], %f2 +	ld	[%o0 + 0x1c], %f3 +1: +	std	%f2, [%o1 + 0x28]	! k[10, 11] +	fxor	%f28, %f0, %f0 +	fxor	%f30, %f2, %f2 + +10: +	sethi	%hi(SIGMA), %g3 +	or	%g3, %lo(SIGMA), %g3 +	ldd	[%g3 + 0x00], %f16 +	ldd	[%g3 + 0x08], %f18 +	ldd	[%g3 + 0x10], %f20 +	ldd	[%g3 + 0x18], %f22 +	ldd	[%g3 + 0x20], %f24 +	ldd	[%g3 + 0x28], %f26 +	CAMELLIA_F(16, 2, 0, 2) +	CAMELLIA_F(18, 0, 2, 0) +	fxor	%f28, %f0, %f0 +	fxor	%f30, %f2, %f2 +	CAMELLIA_F(20, 2, 0, 2) +	CAMELLIA_F(22, 0, 2, 0) + +#define ROTL128(S01, S23, TMP1, TMP2, N)	\ +	srlx	S01, (64 - N), TMP1;		\ +	sllx	S01, N, S01;			\ +	srlx	S23, (64 - N), TMP2;		\ +	sllx	S23, N, S23;			\ +	or	S01, TMP2, S01;			\ +	or	S23, TMP1, S23 + +	cmp	%o2, 16 +	bne	1f +	 nop +	/* 128-bit key */ +	std	%f0, [%o1 + 0x10]	! k[ 4,  5] +	std	%f2, [%o1 + 0x18]	! k[ 6,  7] +	MOVDTOX_F0_O4 +	MOVDTOX_F2_O5 +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o4, [%o1 + 0x30]	! k[12, 13] +	stx	%o5, [%o1 + 0x38]	! k[14, 15] +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o4, [%o1 + 0x40]	! k[16, 17] +	stx	%o5, [%o1 + 0x48]	! k[18, 19] +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o4, [%o1 + 0x60]	! k[24, 25] +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o4, [%o1 + 0x70]	! k[28, 29] +	stx	%o5, [%o1 + 0x78]	! k[30, 31] +	ROTL128(%o4, %o5, %g2, %g3, 34) +	stx	%o4, [%o1 + 0xa0]	! k[40, 41] +	stx	%o5, [%o1 + 0xa8]	! k[42, 43] +	ROTL128(%o4, %o5, %g2, %g3, 17) +	stx	%o4, [%o1 + 0xc0]	! k[48, 49] +	stx	%o5, [%o1 + 0xc8]	! k[50, 51] + +	ldx	[%o1 + 0x00], %o4	! k[ 0,  1] +	ldx	[%o1 + 0x08], %o5	! k[ 2,  3] +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o4, [%o1 + 0x20]	! k[ 8,  9] +	stx	%o5, [%o1 + 0x28]	! k[10, 11] +	ROTL128(%o4, %o5, %g2, %g3, 30) +	stx	%o4, [%o1 + 0x50]	! k[20, 21] +	stx	%o5, [%o1 + 0x58]	! k[22, 23] +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o5, [%o1 + 0x68]	! k[26, 27] +	ROTL128(%o4, %o5, %g2, %g3, 17) +	stx	%o4, [%o1 + 0x80]	! k[32, 33] +	stx	%o5, [%o1 + 0x88]	! k[34, 35] +	ROTL128(%o4, %o5, %g2, %g3, 17) +	stx	%o4, [%o1 + 0x90]	! k[36, 37] +	stx	%o5, [%o1 + 0x98]	! k[38, 39] +	ROTL128(%o4, %o5, %g2, %g3, 17) +	stx	%o4, [%o1 + 0xb0]	! k[44, 45] +	stx	%o5, [%o1 + 0xb8]	! k[46, 47] + +	ba,pt	%xcc, 2f +	 mov	(3 * 16 * 4), %o0 + +1: +	/* 192-bit or 256-bit key */ +	std	%f0, [%o1 + 0x30]	! k[12, 13] +	std	%f2, [%o1 + 0x38]	! k[14, 15] +	ldd	[%o1 + 0x20], %f4	! k[ 8,  9] +	ldd	[%o1 + 0x28], %f6	! k[10, 11] +	fxor	%f0, %f4, %f0 +	fxor	%f2, %f6, %f2 +	CAMELLIA_F(24, 2, 0, 2) +	CAMELLIA_F(26, 0, 2, 0) +	std	%f0, [%o1 + 0x10]	! k[ 4,  5] +	std	%f2, [%o1 + 0x18]	! k[ 6,  7] +	MOVDTOX_F0_O4 +	MOVDTOX_F2_O5 +	ROTL128(%o4, %o5, %g2, %g3, 30) +	stx	%o4, [%o1 + 0x50]	! k[20, 21] +	stx	%o5, [%o1 + 0x58]	! k[22, 23] +	ROTL128(%o4, %o5, %g2, %g3, 30) +	stx	%o4, [%o1 + 0xa0]	! k[40, 41] +	stx	%o5, [%o1 + 0xa8]	! k[42, 43] +	ROTL128(%o4, %o5, %g2, %g3, 51) +	stx	%o4, [%o1 + 0x100]	! k[64, 65] +	stx	%o5, [%o1 + 0x108]	! k[66, 67] +	ldx	[%o1 + 0x20], %o4	! k[ 8,  9] +	ldx	[%o1 + 0x28], %o5	! k[10, 11] +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o4, [%o1 + 0x20]	! k[ 8,  9] +	stx	%o5, [%o1 + 0x28]	! k[10, 11] +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o4, [%o1 + 0x40]	! k[16, 17] +	stx	%o5, [%o1 + 0x48]	! k[18, 19] +	ROTL128(%o4, %o5, %g2, %g3, 30) +	stx	%o4, [%o1 + 0x90]	! k[36, 37] +	stx	%o5, [%o1 + 0x98]	! k[38, 39] +	ROTL128(%o4, %o5, %g2, %g3, 34) +	stx	%o4, [%o1 + 0xd0]	! k[52, 53] +	stx	%o5, [%o1 + 0xd8]	! k[54, 55] +	ldx	[%o1 + 0x30], %o4	! k[12, 13] +	ldx	[%o1 + 0x38], %o5	! k[14, 15] +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o4, [%o1 + 0x30]	! k[12, 13] +	stx	%o5, [%o1 + 0x38]	! k[14, 15] +	ROTL128(%o4, %o5, %g2, %g3, 30) +	stx	%o4, [%o1 + 0x70]	! k[28, 29] +	stx	%o5, [%o1 + 0x78]	! k[30, 31] +	srlx	%o4, 32, %g2 +	srlx	%o5, 32, %g3 +	stw	%o4, [%o1 + 0xc0]	! k[48] +	stw	%g3, [%o1 + 0xc4]	! k[49] +	stw	%o5, [%o1 + 0xc8]	! k[50] +	stw	%g2, [%o1 + 0xcc]	! k[51] +	ROTL128(%o4, %o5, %g2, %g3, 49) +	stx	%o4, [%o1 + 0xe0]	! k[56, 57] +	stx	%o5, [%o1 + 0xe8]	! k[58, 59] +	ldx	[%o1 + 0x00], %o4	! k[ 0,  1] +	ldx	[%o1 + 0x08], %o5	! k[ 2,  3] +	ROTL128(%o4, %o5, %g2, %g3, 45) +	stx	%o4, [%o1 + 0x60]	! k[24, 25] +	stx	%o5, [%o1 + 0x68]	! k[26, 27] +	ROTL128(%o4, %o5, %g2, %g3, 15) +	stx	%o4, [%o1 + 0x80]	! k[32, 33] +	stx	%o5, [%o1 + 0x88]	! k[34, 35] +	ROTL128(%o4, %o5, %g2, %g3, 17) +	stx	%o4, [%o1 + 0xb0]	! k[44, 45] +	stx	%o5, [%o1 + 0xb8]	! k[46, 47] +	ROTL128(%o4, %o5, %g2, %g3, 34) +	stx	%o4, [%o1 + 0xf0]	! k[60, 61] +	stx	%o5, [%o1 + 0xf8]	! k[62, 63] +	mov	(4 * 16 * 4), %o0 +2: +	add	%o1, %o0, %o1 +	ldd	[%o1 + 0x00], %f0 +	ldd	[%o1 + 0x08], %f2 +	std	%f0, [%o3 + 0x00] +	std	%f2, [%o3 + 0x08] +	add	%o3, 0x10, %o3 +1: +	sub	%o1, (16 * 4), %o1 +	ldd	[%o1 + 0x38], %f0 +	ldd	[%o1 + 0x30], %f2 +	ldd	[%o1 + 0x28], %f4 +	ldd	[%o1 + 0x20], %f6 +	ldd	[%o1 + 0x18], %f8 +	ldd	[%o1 + 0x10], %f10 +	std	%f0, [%o3 + 0x00] +	std	%f2, [%o3 + 0x08] +	std	%f4, [%o3 + 0x10] +	std	%f6, [%o3 + 0x18] +	std	%f8, [%o3 + 0x20] +	std	%f10, [%o3 + 0x28] + +	ldd	[%o1 + 0x08], %f0 +	ldd	[%o1 + 0x00], %f2 +	std	%f0, [%o3 + 0x30] +	std	%f2, [%o3 + 0x38] +	subcc	%o0, (16 * 4), %o0 +	bne,pt	%icc, 1b +	 add	%o3, (16 * 4), %o3 + +	std	%f2, [%o3 - 0x10] +	std	%f0, [%o3 - 0x08] + +	retl +	 VISExit +ENDPROC(camellia_sparc64_key_expand) + +	.align	32 +ENTRY(camellia_sparc64_crypt) +	/* %o0=key, %o1=input, %o2=output, %o3=key_len */ +	VISEntry + +	ld	[%o1 + 0x00], %f0 +	ld	[%o1 + 0x04], %f1 +	ld	[%o1 + 0x08], %f2 +	ld	[%o1 + 0x0c], %f3 + +	ldd	[%o0 + 0x00], %f4 +	ldd	[%o0 + 0x08], %f6 + +	cmp	%o3, 16 +	fxor	%f4, %f0, %f0 +	be	1f +	 fxor	%f6, %f2, %f2 + +	ldd	[%o0 + 0x10], %f8 +	ldd	[%o0 + 0x18], %f10 +	ldd	[%o0 + 0x20], %f12 +	ldd	[%o0 + 0x28], %f14 +	ldd	[%o0 + 0x30], %f16 +	ldd	[%o0 + 0x38], %f18 +	ldd	[%o0 + 0x40], %f20 +	ldd	[%o0 + 0x48], %f22 +	add	%o0, 0x40, %o0 + +	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + +1: +	ldd	[%o0 + 0x10], %f8 +	ldd	[%o0 + 0x18], %f10 +	ldd	[%o0 + 0x20], %f12 +	ldd	[%o0 + 0x28], %f14 +	ldd	[%o0 + 0x30], %f16 +	ldd	[%o0 + 0x38], %f18 +	ldd	[%o0 + 0x40], %f20 +	ldd	[%o0 + 0x48], %f22 +	ldd	[%o0 + 0x50], %f24 +	ldd	[%o0 + 0x58], %f26 +	ldd	[%o0 + 0x60], %f28 +	ldd	[%o0 + 0x68], %f30 +	ldd	[%o0 + 0x70], %f32 +	ldd	[%o0 + 0x78], %f34 +	ldd	[%o0 + 0x80], %f36 +	ldd	[%o0 + 0x88], %f38 +	ldd	[%o0 + 0x90], %f40 +	ldd	[%o0 + 0x98], %f42 +	ldd	[%o0 + 0xa0], %f44 +	ldd	[%o0 + 0xa8], %f46 +	ldd	[%o0 + 0xb0], %f48 +	ldd	[%o0 + 0xb8], %f50 +	ldd	[%o0 + 0xc0], %f52 +	ldd	[%o0 + 0xc8], %f54 + +	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) +	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) +	CAMELLIA_6ROUNDS(40, 0, 2) +	fxor	%f52, %f2, %f2 +	fxor	%f54, %f0, %f0 + +	st	%f2, [%o2 + 0x00] +	st	%f3, [%o2 + 0x04] +	st	%f0, [%o2 + 0x08] +	st	%f1, [%o2 + 0x0c] + +	retl +	 VISExit +ENDPROC(camellia_sparc64_crypt) + +	.align	32 +ENTRY(camellia_sparc64_load_keys) +	/* %o0=key, %o1=key_len */ +	VISEntry +	ldd	[%o0 + 0x00], %f4 +	ldd	[%o0 + 0x08], %f6 +	ldd	[%o0 + 0x10], %f8 +	ldd	[%o0 + 0x18], %f10 +	ldd	[%o0 + 0x20], %f12 +	ldd	[%o0 + 0x28], %f14 +	ldd	[%o0 + 0x30], %f16 +	ldd	[%o0 + 0x38], %f18 +	ldd	[%o0 + 0x40], %f20 +	ldd	[%o0 + 0x48], %f22 +	ldd	[%o0 + 0x50], %f24 +	ldd	[%o0 + 0x58], %f26 +	ldd	[%o0 + 0x60], %f28 +	ldd	[%o0 + 0x68], %f30 +	ldd	[%o0 + 0x70], %f32 +	ldd	[%o0 + 0x78], %f34 +	ldd	[%o0 + 0x80], %f36 +	ldd	[%o0 + 0x88], %f38 +	ldd	[%o0 + 0x90], %f40 +	ldd	[%o0 + 0x98], %f42 +	ldd	[%o0 + 0xa0], %f44 +	ldd	[%o0 + 0xa8], %f46 +	ldd	[%o0 + 0xb0], %f48 +	ldd	[%o0 + 0xb8], %f50 +	ldd	[%o0 + 0xc0], %f52 +	retl +	 ldd	[%o0 + 0xc8], %f54 +ENDPROC(camellia_sparc64_load_keys) + +	.align	32 +ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds) +	/* %o0=input, %o1=output, %o2=len, %o3=key */ +1:	ldd	[%o0 + 0x00], %f0 +	ldd	[%o0 + 0x08], %f2 +	add	%o0, 0x10, %o0 +	fxor	%f4, %f0, %f0 +	fxor	%f6, %f2, %f2 +	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) +	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) +	CAMELLIA_6ROUNDS(40, 0, 2) +	fxor	%f52, %f2, %f2 +	fxor	%f54, %f0, %f0 +	std	%f2, [%o1 + 0x00] +	std	%f0, [%o1 + 0x08] +	subcc	%o2, 0x10, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x10, %o1 +	retl +	 nop +ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds) + +	.align	32 +ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds) +	/* %o0=input, %o1=output, %o2=len, %o3=key */ +1:	ldd	[%o0 + 0x00], %f0 +	ldd	[%o0 + 0x08], %f2 +	add	%o0, 0x10, %o0 +	fxor	%f4, %f0, %f0 +	fxor	%f6, %f2, %f2 +	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) +	ldd	[%o3 + 0xd0], %f8 +	ldd	[%o3 + 0xd8], %f10 +	ldd	[%o3 + 0xe0], %f12 +	ldd	[%o3 + 0xe8], %f14 +	ldd	[%o3 + 0xf0], %f16 +	ldd	[%o3 + 0xf8], %f18 +	ldd	[%o3 + 0x100], %f20 +	ldd	[%o3 + 0x108], %f22 +	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) +	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) +	CAMELLIA_F(8, 2, 0, 2) +	CAMELLIA_F(10, 0, 2, 0) +	ldd	[%o3 + 0x10], %f8 +	ldd	[%o3 + 0x18], %f10 +	CAMELLIA_F(12, 2, 0, 2) +	CAMELLIA_F(14, 0, 2, 0) +	ldd	[%o3 + 0x20], %f12 +	ldd	[%o3 + 0x28], %f14 +	CAMELLIA_F(16, 2, 0, 2) +	CAMELLIA_F(18, 0, 2, 0) +	ldd	[%o3 + 0x30], %f16 +	ldd	[%o3 + 0x38], %f18 +	fxor	%f20, %f2, %f2 +	fxor	%f22, %f0, %f0 +	ldd	[%o3 + 0x40], %f20 +	ldd	[%o3 + 0x48], %f22 +	std	%f2, [%o1 + 0x00] +	std	%f0, [%o1 + 0x08] +	subcc	%o2, 0x10, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x10, %o1 +	retl +	 nop +ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds) + +	.align	32 +ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds) +	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ +	ldd	[%o4 + 0x00], %f60 +	ldd	[%o4 + 0x08], %f62 +1:	ldd	[%o0 + 0x00], %f0 +	ldd	[%o0 + 0x08], %f2 +	add	%o0, 0x10, %o0 +	fxor	%f60, %f0, %f0 +	fxor	%f62, %f2, %f2 +	fxor	%f4, %f0, %f0 +	fxor	%f6, %f2, %f2 +	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) +	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) +	CAMELLIA_6ROUNDS(40, 0, 2) +	fxor	%f52, %f2, %f60 +	fxor	%f54, %f0, %f62 +	std	%f60, [%o1 + 0x00] +	std	%f62, [%o1 + 0x08] +	subcc	%o2, 0x10, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x10, %o1 +	std	%f60, [%o4 + 0x00] +	retl +	 std	%f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds) + +	.align	32 +ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds) +	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ +	ldd	[%o4 + 0x00], %f60 +	ldd	[%o4 + 0x08], %f62 +1:	ldd	[%o0 + 0x00], %f0 +	ldd	[%o0 + 0x08], %f2 +	add	%o0, 0x10, %o0 +	fxor	%f60, %f0, %f0 +	fxor	%f62, %f2, %f2 +	fxor	%f4, %f0, %f0 +	fxor	%f6, %f2, %f2 +	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) +	ldd	[%o3 + 0xd0], %f8 +	ldd	[%o3 + 0xd8], %f10 +	ldd	[%o3 + 0xe0], %f12 +	ldd	[%o3 + 0xe8], %f14 +	ldd	[%o3 + 0xf0], %f16 +	ldd	[%o3 + 0xf8], %f18 +	ldd	[%o3 + 0x100], %f20 +	ldd	[%o3 + 0x108], %f22 +	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) +	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) +	CAMELLIA_F(8, 2, 0, 2) +	CAMELLIA_F(10, 0, 2, 0) +	ldd	[%o3 + 0x10], %f8 +	ldd	[%o3 + 0x18], %f10 +	CAMELLIA_F(12, 2, 0, 2) +	CAMELLIA_F(14, 0, 2, 0) +	ldd	[%o3 + 0x20], %f12 +	ldd	[%o3 + 0x28], %f14 +	CAMELLIA_F(16, 2, 0, 2) +	CAMELLIA_F(18, 0, 2, 0) +	ldd	[%o3 + 0x30], %f16 +	ldd	[%o3 + 0x38], %f18 +	fxor	%f20, %f2, %f60 +	fxor	%f22, %f0, %f62 +	ldd	[%o3 + 0x40], %f20 +	ldd	[%o3 + 0x48], %f22 +	std	%f60, [%o1 + 0x00] +	std	%f62, [%o1 + 0x08] +	subcc	%o2, 0x10, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x10, %o1 +	std	%f60, [%o4 + 0x00] +	retl +	 std	%f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds) + +	.align	32 +ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds) +	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ +	ldd	[%o4 + 0x00], %f60 +	ldd	[%o4 + 0x08], %f62 +1:	ldd	[%o0 + 0x00], %f56 +	ldd	[%o0 + 0x08], %f58 +	add	%o0, 0x10, %o0 +	fxor	%f4, %f56, %f0 +	fxor	%f6, %f58, %f2 +	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) +	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) +	CAMELLIA_6ROUNDS(40, 0, 2) +	fxor	%f52, %f2, %f2 +	fxor	%f54, %f0, %f0 +	fxor	%f60, %f2, %f2 +	fxor	%f62, %f0, %f0 +	fsrc2	%f56, %f60 +	fsrc2	%f58, %f62 +	std	%f2, [%o1 + 0x00] +	std	%f0, [%o1 + 0x08] +	subcc	%o2, 0x10, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x10, %o1 +	std	%f60, [%o4 + 0x00] +	retl +	 std	%f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds) + +	.align	32 +ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds) +	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ +	ldd	[%o4 + 0x00], %f60 +	ldd	[%o4 + 0x08], %f62 +1:	ldd	[%o0 + 0x00], %f56 +	ldd	[%o0 + 0x08], %f58 +	add	%o0, 0x10, %o0 +	fxor	%f4, %f56, %f0 +	fxor	%f6, %f58, %f2 +	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) +	ldd	[%o3 + 0xd0], %f8 +	ldd	[%o3 + 0xd8], %f10 +	ldd	[%o3 + 0xe0], %f12 +	ldd	[%o3 + 0xe8], %f14 +	ldd	[%o3 + 0xf0], %f16 +	ldd	[%o3 + 0xf8], %f18 +	ldd	[%o3 + 0x100], %f20 +	ldd	[%o3 + 0x108], %f22 +	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) +	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) +	CAMELLIA_F(8, 2, 0, 2) +	CAMELLIA_F(10, 0, 2, 0) +	ldd	[%o3 + 0x10], %f8 +	ldd	[%o3 + 0x18], %f10 +	CAMELLIA_F(12, 2, 0, 2) +	CAMELLIA_F(14, 0, 2, 0) +	ldd	[%o3 + 0x20], %f12 +	ldd	[%o3 + 0x28], %f14 +	CAMELLIA_F(16, 2, 0, 2) +	CAMELLIA_F(18, 0, 2, 0) +	ldd	[%o3 + 0x30], %f16 +	ldd	[%o3 + 0x38], %f18 +	fxor	%f20, %f2, %f2 +	fxor	%f22, %f0, %f0 +	ldd	[%o3 + 0x40], %f20 +	ldd	[%o3 + 0x48], %f22 +	fxor	%f60, %f2, %f2 +	fxor	%f62, %f0, %f0 +	fsrc2	%f56, %f60 +	fsrc2	%f58, %f62 +	std	%f2, [%o1 + 0x00] +	std	%f0, [%o1 + 0x08] +	subcc	%o2, 0x10, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x10, %o1 +	std	%f60, [%o4 + 0x00] +	retl +	 std	%f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds) diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c new file mode 100644 index 00000000000..888f6260b4e --- /dev/null +++ b/arch/sparc/crypto/camellia_glue.c @@ -0,0 +1,327 @@ +/* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes. + * + * Copyright (C) 2012 David S. Miller <davem@davemloft.net> + */ + +#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt + +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <crypto/algapi.h> + +#include <asm/fpumacro.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +#define CAMELLIA_MIN_KEY_SIZE        16 +#define CAMELLIA_MAX_KEY_SIZE        32 +#define CAMELLIA_BLOCK_SIZE          16 +#define CAMELLIA_TABLE_BYTE_LEN     272 + +struct camellia_sparc64_ctx { +	u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; +	u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; +	int key_len; +}; + +extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key, +					unsigned int key_len, u64 *decrypt_key); + +static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key, +			    unsigned int key_len) +{ +	struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); +	const u32 *in_key = (const u32 *) _in_key; +	u32 *flags = &tfm->crt_flags; + +	if (key_len != 16 && key_len != 24 && key_len != 32) { +		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; +		return -EINVAL; +	} + +	ctx->key_len = key_len; + +	camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0], +				    key_len, &ctx->decrypt_key[0]); +	return 0; +} + +extern void camellia_sparc64_crypt(const u64 *key, const u32 *input, +				   u32 *output, unsigned int key_len); + +static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ +	struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + +	camellia_sparc64_crypt(&ctx->encrypt_key[0], +			       (const u32 *) src, +			       (u32 *) dst, ctx->key_len); +} + +static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ +	struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + +	camellia_sparc64_crypt(&ctx->decrypt_key[0], +			       (const u32 *) src, +			       (u32 *) dst, ctx->key_len); +} + +extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len); + +typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len, +			  const u64 *key); + +extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds; +extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds; + +#define CAMELLIA_BLOCK_MASK	(~(CAMELLIA_BLOCK_SIZE - 1)) + +static int __ecb_crypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes, bool encrypt) +{ +	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	ecb_crypt_op *op; +	const u64 *key; +	int err; + +	op = camellia_sparc64_ecb_crypt_3_grand_rounds; +	if (ctx->key_len != 16) +		op = camellia_sparc64_ecb_crypt_4_grand_rounds; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	if (encrypt) +		key = &ctx->encrypt_key[0]; +	else +		key = &ctx->decrypt_key[0]; +	camellia_sparc64_load_keys(key, ctx->key_len); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + +		if (likely(block_len)) { +			const u64 *src64; +			u64 *dst64; + +			src64 = (const u64 *)walk.src.virt.addr; +			dst64 = (u64 *) walk.dst.virt.addr; +			op(src64, dst64, block_len, key); +		} +		nbytes &= CAMELLIA_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	return __ecb_crypt(desc, dst, src, nbytes, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	return __ecb_crypt(desc, dst, src, nbytes, false); +} + +typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len, +			  const u64 *key, u64 *iv); + +extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds; + +static int cbc_encrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	cbc_crypt_op *op; +	const u64 *key; +	int err; + +	op = camellia_sparc64_cbc_encrypt_3_grand_rounds; +	if (ctx->key_len != 16) +		op = camellia_sparc64_cbc_encrypt_4_grand_rounds; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	key = &ctx->encrypt_key[0]; +	camellia_sparc64_load_keys(key, ctx->key_len); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + +		if (likely(block_len)) { +			const u64 *src64; +			u64 *dst64; + +			src64 = (const u64 *)walk.src.virt.addr; +			dst64 = (u64 *) walk.dst.virt.addr; +			op(src64, dst64, block_len, key, +			   (u64 *) walk.iv); +		} +		nbytes &= CAMELLIA_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	cbc_crypt_op *op; +	const u64 *key; +	int err; + +	op = camellia_sparc64_cbc_decrypt_3_grand_rounds; +	if (ctx->key_len != 16) +		op = camellia_sparc64_cbc_decrypt_4_grand_rounds; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	key = &ctx->decrypt_key[0]; +	camellia_sparc64_load_keys(key, ctx->key_len); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + +		if (likely(block_len)) { +			const u64 *src64; +			u64 *dst64; + +			src64 = (const u64 *)walk.src.virt.addr; +			dst64 = (u64 *) walk.dst.virt.addr; +			op(src64, dst64, block_len, key, +			   (u64 *) walk.iv); +		} +		nbytes &= CAMELLIA_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +static struct crypto_alg algs[] = { { +	.cra_name		= "camellia", +	.cra_driver_name	= "camellia-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER, +	.cra_blocksize		= CAMELLIA_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx), +	.cra_alignmask		= 3, +	.cra_module		= THIS_MODULE, +	.cra_u	= { +		.cipher	= { +			.cia_min_keysize	= CAMELLIA_MIN_KEY_SIZE, +			.cia_max_keysize	= CAMELLIA_MAX_KEY_SIZE, +			.cia_setkey		= camellia_set_key, +			.cia_encrypt		= camellia_encrypt, +			.cia_decrypt		= camellia_decrypt +		} +	} +}, { +	.cra_name		= "ecb(camellia)", +	.cra_driver_name	= "ecb-camellia-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER, +	.cra_blocksize		= CAMELLIA_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx), +	.cra_alignmask		= 7, +	.cra_type		= &crypto_blkcipher_type, +	.cra_module		= THIS_MODULE, +	.cra_u = { +		.blkcipher = { +			.min_keysize	= CAMELLIA_MIN_KEY_SIZE, +			.max_keysize	= CAMELLIA_MAX_KEY_SIZE, +			.setkey		= camellia_set_key, +			.encrypt	= ecb_encrypt, +			.decrypt	= ecb_decrypt, +		}, +	}, +}, { +	.cra_name		= "cbc(camellia)", +	.cra_driver_name	= "cbc-camellia-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER, +	.cra_blocksize		= CAMELLIA_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx), +	.cra_alignmask		= 7, +	.cra_type		= &crypto_blkcipher_type, +	.cra_module		= THIS_MODULE, +	.cra_u = { +		.blkcipher = { +			.min_keysize	= CAMELLIA_MIN_KEY_SIZE, +			.max_keysize	= CAMELLIA_MAX_KEY_SIZE, +			.setkey		= camellia_set_key, +			.encrypt	= cbc_encrypt, +			.decrypt	= cbc_decrypt, +		}, +	}, +} +}; + +static bool __init sparc64_has_camellia_opcode(void) +{ +	unsigned long cfr; + +	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) +		return false; + +	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); +	if (!(cfr & CFR_CAMELLIA)) +		return false; + +	return true; +} + +static int __init camellia_sparc64_mod_init(void) +{ +	int i; + +	for (i = 0; i < ARRAY_SIZE(algs); i++) +		INIT_LIST_HEAD(&algs[i].cra_list); + +	if (sparc64_has_camellia_opcode()) { +		pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n"); +		return crypto_register_algs(algs, ARRAY_SIZE(algs)); +	} +	pr_info("sparc64 camellia opcodes not available.\n"); +	return -ENODEV; +} + +static void __exit camellia_sparc64_mod_fini(void) +{ +	crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(camellia_sparc64_mod_init); +module_exit(camellia_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); + +MODULE_ALIAS("aes"); + +#include "crop_devid.c" diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S new file mode 100644 index 00000000000..2b1976e765b --- /dev/null +++ b/arch/sparc/crypto/crc32c_asm.S @@ -0,0 +1,20 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> +#include <asm/asi.h> + +#include "opcodes.h" + +ENTRY(crc32c_sparc64) +	/* %o0=crc32p, %o1=data_ptr, %o2=len */ +	VISEntryHalf +	lda	[%o0] ASI_PL, %f1 +1:	ldd	[%o1], %f2 +	CRC32C(0,2,0) +	subcc	%o2, 8, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x8, %o1 +	sta	%f1, [%o0] ASI_PL +	VISExitHalf +2:	retl +	 nop +ENDPROC(crc32c_sparc64) diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c new file mode 100644 index 00000000000..5162fad912c --- /dev/null +++ b/arch/sparc/crypto/crc32c_glue.c @@ -0,0 +1,181 @@ +/* Glue code for CRC32C optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/crc32c-intel.c + * + * Copyright (C) 2008 Intel Corporation + * Authors: Austin Zhang <austin_zhang@linux.intel.com> + *          Kent Liu <kent.liu@intel.com> + */ + +#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/crc32.h> + +#include <crypto/internal/hash.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, +				 unsigned int keylen) +{ +	u32 *mctx = crypto_shash_ctx(hash); + +	if (keylen != sizeof(u32)) { +		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); +		return -EINVAL; +	} +	*(__le32 *)mctx = le32_to_cpup((__le32 *)key); +	return 0; +} + +static int crc32c_sparc64_init(struct shash_desc *desc) +{ +	u32 *mctx = crypto_shash_ctx(desc->tfm); +	u32 *crcp = shash_desc_ctx(desc); + +	*crcp = *mctx; + +	return 0; +} + +extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len); + +static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len) +{ +	unsigned int asm_len; + +	asm_len = len & ~7U; +	if (asm_len) { +		crc32c_sparc64(crcp, data, asm_len); +		data += asm_len / 8; +		len -= asm_len; +	} +	if (len) +		*crcp = __crc32c_le(*crcp, (const unsigned char *) data, len); +} + +static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data, +				 unsigned int len) +{ +	u32 *crcp = shash_desc_ctx(desc); + +	crc32c_compute(crcp, (const u64 *) data, len); + +	return 0; +} + +static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len, +				  u8 *out) +{ +	u32 tmp = *crcp; + +	crc32c_compute(&tmp, (const u64 *) data, len); + +	*(__le32 *) out = ~cpu_to_le32(tmp); +	return 0; +} + +static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data, +				unsigned int len, u8 *out) +{ +	return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out) +{ +	u32 *crcp = shash_desc_ctx(desc); + +	*(__le32 *) out = ~cpu_to_le32p(crcp); +	return 0; +} + +static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data, +				 unsigned int len, u8 *out) +{ +	return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len, +				      out); +} + +static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm) +{ +	u32 *key = crypto_tfm_ctx(tfm); + +	*key = ~0; + +	return 0; +} + +#define CHKSUM_BLOCK_SIZE	1 +#define CHKSUM_DIGEST_SIZE	4 + +static struct shash_alg alg = { +	.setkey			=	crc32c_sparc64_setkey, +	.init			=	crc32c_sparc64_init, +	.update			=	crc32c_sparc64_update, +	.final			=	crc32c_sparc64_final, +	.finup			=	crc32c_sparc64_finup, +	.digest			=	crc32c_sparc64_digest, +	.descsize		=	sizeof(u32), +	.digestsize		=	CHKSUM_DIGEST_SIZE, +	.base			=	{ +		.cra_name		=	"crc32c", +		.cra_driver_name	=	"crc32c-sparc64", +		.cra_priority		=	SPARC_CR_OPCODE_PRIORITY, +		.cra_blocksize		=	CHKSUM_BLOCK_SIZE, +		.cra_ctxsize		=	sizeof(u32), +		.cra_alignmask		=	7, +		.cra_module		=	THIS_MODULE, +		.cra_init		=	crc32c_sparc64_cra_init, +	} +}; + +static bool __init sparc64_has_crc32c_opcode(void) +{ +	unsigned long cfr; + +	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) +		return false; + +	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); +	if (!(cfr & CFR_CRC32C)) +		return false; + +	return true; +} + +static int __init crc32c_sparc64_mod_init(void) +{ +	if (sparc64_has_crc32c_opcode()) { +		pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); +		return crypto_register_shash(&alg); +	} +	pr_info("sparc64 crc32c opcode not available.\n"); +	return -ENODEV; +} + +static void __exit crc32c_sparc64_mod_fini(void) +{ +	crypto_unregister_shash(&alg); +} + +module_init(crc32c_sparc64_mod_init); +module_exit(crc32c_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); + +MODULE_ALIAS("crc32c"); + +#include "crop_devid.c" diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c new file mode 100644 index 00000000000..5f5724a0ae2 --- /dev/null +++ b/arch/sparc/crypto/crop_devid.c @@ -0,0 +1,14 @@ +#include <linux/module.h> +#include <linux/of_device.h> + +/* This is a dummy device table linked into all of the crypto + * opcode drivers.  It serves to trigger the module autoloading + * mechanisms in userspace which scan the OF device tree and + * load any modules which have device table entries that + * match OF device nodes. + */ +static const struct of_device_id crypto_opcode_match[] = { +	{ .name = "cpu", .compatible = "sun4v", }, +	{}, +}; +MODULE_DEVICE_TABLE(of, crypto_opcode_match); diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S new file mode 100644 index 00000000000..b5c8fc269b5 --- /dev/null +++ b/arch/sparc/crypto/des_asm.S @@ -0,0 +1,419 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +	.align	32 +ENTRY(des_sparc64_key_expand) +	/* %o0=input_key, %o1=output_key */ +	VISEntryHalf +	ld	[%o0 + 0x00], %f0 +	ld	[%o0 + 0x04], %f1 +	DES_KEXPAND(0, 0, 0) +	DES_KEXPAND(0, 1, 2) +	DES_KEXPAND(2, 3, 6) +	DES_KEXPAND(2, 2, 4) +	DES_KEXPAND(6, 3, 10) +	DES_KEXPAND(6, 2, 8) +	DES_KEXPAND(10, 3, 14) +	DES_KEXPAND(10, 2, 12) +	DES_KEXPAND(14, 1, 16) +	DES_KEXPAND(16, 3, 20) +	DES_KEXPAND(16, 2, 18) +	DES_KEXPAND(20, 3, 24) +	DES_KEXPAND(20, 2, 22) +	DES_KEXPAND(24, 3, 28) +	DES_KEXPAND(24, 2, 26) +	DES_KEXPAND(28, 1, 30) +	std	%f0, [%o1 + 0x00] +	std	%f2, [%o1 + 0x08] +	std	%f4, [%o1 + 0x10] +	std	%f6, [%o1 + 0x18] +	std	%f8, [%o1 + 0x20] +	std	%f10, [%o1 + 0x28] +	std	%f12, [%o1 + 0x30] +	std	%f14, [%o1 + 0x38] +	std	%f16, [%o1 + 0x40] +	std	%f18, [%o1 + 0x48] +	std	%f20, [%o1 + 0x50] +	std	%f22, [%o1 + 0x58] +	std	%f24, [%o1 + 0x60] +	std	%f26, [%o1 + 0x68] +	std	%f28, [%o1 + 0x70] +	std	%f30, [%o1 + 0x78] +	retl +	 VISExitHalf +ENDPROC(des_sparc64_key_expand) + +	.align	32 +ENTRY(des_sparc64_crypt) +	/* %o0=key, %o1=input, %o2=output */ +	VISEntry +	ldd	[%o1 + 0x00], %f32 +	ldd	[%o0 + 0x00], %f0 +	ldd	[%o0 + 0x08], %f2 +	ldd	[%o0 + 0x10], %f4 +	ldd	[%o0 + 0x18], %f6 +	ldd	[%o0 + 0x20], %f8 +	ldd	[%o0 + 0x28], %f10 +	ldd	[%o0 + 0x30], %f12 +	ldd	[%o0 + 0x38], %f14 +	ldd	[%o0 + 0x40], %f16 +	ldd	[%o0 + 0x48], %f18 +	ldd	[%o0 + 0x50], %f20 +	ldd	[%o0 + 0x58], %f22 +	ldd	[%o0 + 0x60], %f24 +	ldd	[%o0 + 0x68], %f26 +	ldd	[%o0 + 0x70], %f28 +	ldd	[%o0 + 0x78], %f30 +	DES_IP(32, 32) +	DES_ROUND(0, 2, 32, 32) +	DES_ROUND(4, 6, 32, 32) +	DES_ROUND(8, 10, 32, 32) +	DES_ROUND(12, 14, 32, 32) +	DES_ROUND(16, 18, 32, 32) +	DES_ROUND(20, 22, 32, 32) +	DES_ROUND(24, 26, 32, 32) +	DES_ROUND(28, 30, 32, 32) +	DES_IIP(32, 32) +	std	%f32, [%o2 + 0x00] +	retl +	 VISExit +ENDPROC(des_sparc64_crypt) + +	.align	32 +ENTRY(des_sparc64_load_keys) +	/* %o0=key */ +	VISEntry +	ldd	[%o0 + 0x00], %f0 +	ldd	[%o0 + 0x08], %f2 +	ldd	[%o0 + 0x10], %f4 +	ldd	[%o0 + 0x18], %f6 +	ldd	[%o0 + 0x20], %f8 +	ldd	[%o0 + 0x28], %f10 +	ldd	[%o0 + 0x30], %f12 +	ldd	[%o0 + 0x38], %f14 +	ldd	[%o0 + 0x40], %f16 +	ldd	[%o0 + 0x48], %f18 +	ldd	[%o0 + 0x50], %f20 +	ldd	[%o0 + 0x58], %f22 +	ldd	[%o0 + 0x60], %f24 +	ldd	[%o0 + 0x68], %f26 +	ldd	[%o0 + 0x70], %f28 +	retl +	 ldd	[%o0 + 0x78], %f30 +ENDPROC(des_sparc64_load_keys) + +	.align	32 +ENTRY(des_sparc64_ecb_crypt) +	/* %o0=input, %o1=output, %o2=len */ +1:	ldd	[%o0 + 0x00], %f32 +	add	%o0, 0x08, %o0 +	DES_IP(32, 32) +	DES_ROUND(0, 2, 32, 32) +	DES_ROUND(4, 6, 32, 32) +	DES_ROUND(8, 10, 32, 32) +	DES_ROUND(12, 14, 32, 32) +	DES_ROUND(16, 18, 32, 32) +	DES_ROUND(20, 22, 32, 32) +	DES_ROUND(24, 26, 32, 32) +	DES_ROUND(28, 30, 32, 32) +	DES_IIP(32, 32) +	std	%f32, [%o1 + 0x00] +	subcc	%o2, 0x08, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x08, %o1 +	retl +	 nop +ENDPROC(des_sparc64_ecb_crypt) + +	.align	32 +ENTRY(des_sparc64_cbc_encrypt) +	/* %o0=input, %o1=output, %o2=len, %o3=IV */ +	ldd	[%o3 + 0x00], %f32 +1:	ldd	[%o0 + 0x00], %f34 +	fxor	%f32, %f34, %f32 +	DES_IP(32, 32) +	DES_ROUND(0, 2, 32, 32) +	DES_ROUND(4, 6, 32, 32) +	DES_ROUND(8, 10, 32, 32) +	DES_ROUND(12, 14, 32, 32) +	DES_ROUND(16, 18, 32, 32) +	DES_ROUND(20, 22, 32, 32) +	DES_ROUND(24, 26, 32, 32) +	DES_ROUND(28, 30, 32, 32) +	DES_IIP(32, 32) +	std	%f32, [%o1 + 0x00] +	add	%o0, 0x08, %o0 +	subcc	%o2, 0x08, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x08, %o1 +	retl +	 std	%f32, [%o3 + 0x00] +ENDPROC(des_sparc64_cbc_encrypt) + +	.align	32 +ENTRY(des_sparc64_cbc_decrypt) +	/* %o0=input, %o1=output, %o2=len, %o3=IV */ +	ldd	[%o3 + 0x00], %f34 +1:	ldd	[%o0 + 0x00], %f36 +	DES_IP(36, 32) +	DES_ROUND(0, 2, 32, 32) +	DES_ROUND(4, 6, 32, 32) +	DES_ROUND(8, 10, 32, 32) +	DES_ROUND(12, 14, 32, 32) +	DES_ROUND(16, 18, 32, 32) +	DES_ROUND(20, 22, 32, 32) +	DES_ROUND(24, 26, 32, 32) +	DES_ROUND(28, 30, 32, 32) +	DES_IIP(32, 32) +	fxor	%f32, %f34, %f32 +	fsrc2	%f36, %f34 +	std	%f32, [%o1 + 0x00] +	add	%o0, 0x08, %o0 +	subcc	%o2, 0x08, %o2 +	bne,pt	%icc, 1b +	 add	%o1, 0x08, %o1 +	retl +	 std	%f36, [%o3 + 0x00] +ENDPROC(des_sparc64_cbc_decrypt) + +	.align	32 +ENTRY(des3_ede_sparc64_crypt) +	/* %o0=key, %o1=input, %o2=output */ +	VISEntry +	ldd	[%o1 + 0x00], %f32 +	ldd	[%o0 + 0x00], %f0 +	ldd	[%o0 + 0x08], %f2 +	ldd	[%o0 + 0x10], %f4 +	ldd	[%o0 + 0x18], %f6 +	ldd	[%o0 + 0x20], %f8 +	ldd	[%o0 + 0x28], %f10 +	ldd	[%o0 + 0x30], %f12 +	ldd	[%o0 + 0x38], %f14 +	ldd	[%o0 + 0x40], %f16 +	ldd	[%o0 + 0x48], %f18 +	ldd	[%o0 + 0x50], %f20 +	ldd	[%o0 + 0x58], %f22 +	ldd	[%o0 + 0x60], %f24 +	ldd	[%o0 + 0x68], %f26 +	ldd	[%o0 + 0x70], %f28 +	ldd	[%o0 + 0x78], %f30 +	DES_IP(32, 32) +	DES_ROUND(0, 2, 32, 32) +	ldd	[%o0 + 0x80], %f0 +	ldd	[%o0 + 0x88], %f2 +	DES_ROUND(4, 6, 32, 32) +	ldd	[%o0 + 0x90], %f4 +	ldd	[%o0 + 0x98], %f6 +	DES_ROUND(8, 10, 32, 32) +	ldd	[%o0 + 0xa0], %f8 +	ldd	[%o0 + 0xa8], %f10 +	DES_ROUND(12, 14, 32, 32) +	ldd	[%o0 + 0xb0], %f12 +	ldd	[%o0 + 0xb8], %f14 +	DES_ROUND(16, 18, 32, 32) +	ldd	[%o0 + 0xc0], %f16 +	ldd	[%o0 + 0xc8], %f18 +	DES_ROUND(20, 22, 32, 32) +	ldd	[%o0 + 0xd0], %f20 +	ldd	[%o0 + 0xd8], %f22 +	DES_ROUND(24, 26, 32, 32) +	ldd	[%o0 + 0xe0], %f24 +	ldd	[%o0 + 0xe8], %f26 +	DES_ROUND(28, 30, 32, 32) +	ldd	[%o0 + 0xf0], %f28 +	ldd	[%o0 + 0xf8], %f30 +	DES_IIP(32, 32) +	DES_IP(32, 32) +	DES_ROUND(0, 2, 32, 32) +	ldd	[%o0 + 0x100], %f0 +	ldd	[%o0 + 0x108], %f2 +	DES_ROUND(4, 6, 32, 32) +	ldd	[%o0 + 0x110], %f4 +	ldd	[%o0 + 0x118], %f6 +	DES_ROUND(8, 10, 32, 32) +	ldd	[%o0 + 0x120], %f8 +	ldd	[%o0 + 0x128], %f10 +	DES_ROUND(12, 14, 32, 32) +	ldd	[%o0 + 0x130], %f12 +	ldd	[%o0 + 0x138], %f14 +	DES_ROUND(16, 18, 32, 32) +	ldd	[%o0 + 0x140], %f16 +	ldd	[%o0 + 0x148], %f18 +	DES_ROUND(20, 22, 32, 32) +	ldd	[%o0 + 0x150], %f20 +	ldd	[%o0 + 0x158], %f22 +	DES_ROUND(24, 26, 32, 32) +	ldd	[%o0 + 0x160], %f24 +	ldd	[%o0 + 0x168], %f26 +	DES_ROUND(28, 30, 32, 32) +	ldd	[%o0 + 0x170], %f28 +	ldd	[%o0 + 0x178], %f30 +	DES_IIP(32, 32) +	DES_IP(32, 32) +	DES_ROUND(0, 2, 32, 32) +	DES_ROUND(4, 6, 32, 32) +	DES_ROUND(8, 10, 32, 32) +	DES_ROUND(12, 14, 32, 32) +	DES_ROUND(16, 18, 32, 32) +	DES_ROUND(20, 22, 32, 32) +	DES_ROUND(24, 26, 32, 32) +	DES_ROUND(28, 30, 32, 32) +	DES_IIP(32, 32) + +	std	%f32, [%o2 + 0x00] +	retl +	 VISExit +ENDPROC(des3_ede_sparc64_crypt) + +	.align	32 +ENTRY(des3_ede_sparc64_load_keys) +	/* %o0=key */ +	VISEntry +	ldd	[%o0 + 0x00], %f0 +	ldd	[%o0 + 0x08], %f2 +	ldd	[%o0 + 0x10], %f4 +	ldd	[%o0 + 0x18], %f6 +	ldd	[%o0 + 0x20], %f8 +	ldd	[%o0 + 0x28], %f10 +	ldd	[%o0 + 0x30], %f12 +	ldd	[%o0 + 0x38], %f14 +	ldd	[%o0 + 0x40], %f16 +	ldd	[%o0 + 0x48], %f18 +	ldd	[%o0 + 0x50], %f20 +	ldd	[%o0 + 0x58], %f22 +	ldd	[%o0 + 0x60], %f24 +	ldd	[%o0 + 0x68], %f26 +	ldd	[%o0 + 0x70], %f28 +	ldd	[%o0 + 0x78], %f30 +	ldd	[%o0 + 0x80], %f32 +	ldd	[%o0 + 0x88], %f34 +	ldd	[%o0 + 0x90], %f36 +	ldd	[%o0 + 0x98], %f38 +	ldd	[%o0 + 0xa0], %f40 +	ldd	[%o0 + 0xa8], %f42 +	ldd	[%o0 + 0xb0], %f44 +	ldd	[%o0 + 0xb8], %f46 +	ldd	[%o0 + 0xc0], %f48 +	ldd	[%o0 + 0xc8], %f50 +	ldd	[%o0 + 0xd0], %f52 +	ldd	[%o0 + 0xd8], %f54 +	ldd	[%o0 + 0xe0], %f56 +	retl +	 ldd	[%o0 + 0xe8], %f58 +ENDPROC(des3_ede_sparc64_load_keys) + +#define DES3_LOOP_BODY(X) \ +	DES_IP(X, X) \ +	DES_ROUND(0, 2, X, X) \ +	DES_ROUND(4, 6, X, X) \ +	DES_ROUND(8, 10, X, X) \ +	DES_ROUND(12, 14, X, X) \ +	DES_ROUND(16, 18, X, X) \ +	ldd	[%o0 + 0xf0], %f16; \ +	ldd	[%o0 + 0xf8], %f18; \ +	DES_ROUND(20, 22, X, X) \ +	ldd	[%o0 + 0x100], %f20; \ +	ldd	[%o0 + 0x108], %f22; \ +	DES_ROUND(24, 26, X, X) \ +	ldd	[%o0 + 0x110], %f24; \ +	ldd	[%o0 + 0x118], %f26; \ +	DES_ROUND(28, 30, X, X) \ +	ldd	[%o0 + 0x120], %f28; \ +	ldd	[%o0 + 0x128], %f30; \ +	DES_IIP(X, X) \ +	DES_IP(X, X) \ +	DES_ROUND(32, 34, X, X) \ +	ldd	[%o0 + 0x130], %f0; \ +	ldd	[%o0 + 0x138], %f2; \ +	DES_ROUND(36, 38, X, X) \ +	ldd	[%o0 + 0x140], %f4; \ +	ldd	[%o0 + 0x148], %f6; \ +	DES_ROUND(40, 42, X, X) \ +	ldd	[%o0 + 0x150], %f8; \ +	ldd	[%o0 + 0x158], %f10; \ +	DES_ROUND(44, 46, X, X) \ +	ldd	[%o0 + 0x160], %f12; \ +	ldd	[%o0 + 0x168], %f14; \ +	DES_ROUND(48, 50, X, X) \ +	DES_ROUND(52, 54, X, X) \ +	DES_ROUND(56, 58, X, X) \ +	DES_ROUND(16, 18, X, X) \ +	ldd	[%o0 + 0x170], %f16; \ +	ldd	[%o0 + 0x178], %f18; \ +	DES_IIP(X, X) \ +	DES_IP(X, X) \ +	DES_ROUND(20, 22, X, X) \ +	ldd	[%o0 + 0x50], %f20; \ +	ldd	[%o0 + 0x58], %f22; \ +	DES_ROUND(24, 26, X, X) \ +	ldd	[%o0 + 0x60], %f24; \ +	ldd	[%o0 + 0x68], %f26; \ +	DES_ROUND(28, 30, X, X) \ +	ldd	[%o0 + 0x70], %f28; \ +	ldd	[%o0 + 0x78], %f30; \ +	DES_ROUND(0, 2, X, X) \ +	ldd	[%o0 + 0x00], %f0; \ +	ldd	[%o0 + 0x08], %f2; \ +	DES_ROUND(4, 6, X, X) \ +	ldd	[%o0 + 0x10], %f4; \ +	ldd	[%o0 + 0x18], %f6; \ +	DES_ROUND(8, 10, X, X) \ +	ldd	[%o0 + 0x20], %f8; \ +	ldd	[%o0 + 0x28], %f10; \ +	DES_ROUND(12, 14, X, X) \ +	ldd	[%o0 + 0x30], %f12; \ +	ldd	[%o0 + 0x38], %f14; \ +	DES_ROUND(16, 18, X, X) \ +	ldd	[%o0 + 0x40], %f16; \ +	ldd	[%o0 + 0x48], %f18; \ +	DES_IIP(X, X) + +	.align	32 +ENTRY(des3_ede_sparc64_ecb_crypt) +	/* %o0=key, %o1=input, %o2=output, %o3=len */ +1:	ldd	[%o1 + 0x00], %f60 +	DES3_LOOP_BODY(60) +	std	%f60, [%o2 + 0x00] +	add	%o1, 0x08, %o1 +	subcc	%o3, 0x08, %o3 +	bne,pt	%icc, 1b +	 add	%o2, 0x08, %o2 +	retl +	 nop +ENDPROC(des3_ede_sparc64_ecb_crypt) + +	.align	32 +ENTRY(des3_ede_sparc64_cbc_encrypt) +	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ +	ldd	[%o4 + 0x00], %f60 +1:	ldd	[%o1 + 0x00], %f62 +	fxor	%f60, %f62, %f60 +	DES3_LOOP_BODY(60) +	std	%f60, [%o2 + 0x00] +	add	%o1, 0x08, %o1 +	subcc	%o3, 0x08, %o3 +	bne,pt	%icc, 1b +	 add	%o2, 0x08, %o2 +	retl +	 std	%f60, [%o4 + 0x00] +ENDPROC(des3_ede_sparc64_cbc_encrypt) + +	.align	32 +ENTRY(des3_ede_sparc64_cbc_decrypt) +	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ +	ldd	[%o4 + 0x00], %f62 +1:	ldx	[%o1 + 0x00], %g1 +	MOVXTOD_G1_F60 +	DES3_LOOP_BODY(60) +	fxor	%f62, %f60, %f60 +	MOVXTOD_G1_F62 +	std	%f60, [%o2 + 0x00] +	add	%o1, 0x08, %o1 +	subcc	%o3, 0x08, %o3 +	bne,pt	%icc, 1b +	 add	%o2, 0x08, %o2 +	retl +	 stx	%g1, [%o4 + 0x00] +ENDPROC(des3_ede_sparc64_cbc_decrypt) diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c new file mode 100644 index 00000000000..3065bc61f9d --- /dev/null +++ b/arch/sparc/crypto/des_glue.c @@ -0,0 +1,537 @@ +/* Glue code for DES encryption optimized for sparc64 crypto opcodes. + * + * Copyright (C) 2012 David S. Miller <davem@davemloft.net> + */ + +#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt + +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <crypto/algapi.h> +#include <crypto/des.h> + +#include <asm/fpumacro.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +struct des_sparc64_ctx { +	u64 encrypt_expkey[DES_EXPKEY_WORDS / 2]; +	u64 decrypt_expkey[DES_EXPKEY_WORDS / 2]; +}; + +struct des3_ede_sparc64_ctx { +	u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; +	u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; +}; + +static void encrypt_to_decrypt(u64 *d, const u64 *e) +{ +	const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1; +	int i; + +	for (i = 0; i < DES_EXPKEY_WORDS / 2; i++) +		*d++ = *s--; +} + +extern void des_sparc64_key_expand(const u32 *input_key, u64 *key); + +static int des_set_key(struct crypto_tfm *tfm, const u8 *key, +		       unsigned int keylen) +{ +	struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); +	u32 *flags = &tfm->crt_flags; +	u32 tmp[DES_EXPKEY_WORDS]; +	int ret; + +	/* Even though we have special instructions for key expansion, +	 * we call des_ekey() so that we don't have to write our own +	 * weak key detection code. +	 */ +	ret = des_ekey(tmp, key); +	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { +		*flags |= CRYPTO_TFM_RES_WEAK_KEY; +		return -EINVAL; +	} + +	des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]); +	encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]); + +	return 0; +} + +extern void des_sparc64_crypt(const u64 *key, const u64 *input, +			      u64 *output); + +static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ +	struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); +	const u64 *K = ctx->encrypt_expkey; + +	des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ +	struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); +	const u64 *K = ctx->decrypt_expkey; + +	des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +extern void des_sparc64_load_keys(const u64 *key); + +extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output, +				  unsigned int len); + +#define DES_BLOCK_MASK	(~(DES_BLOCK_SIZE - 1)) + +static int __ecb_crypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes, bool encrypt) +{ +	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	if (encrypt) +		des_sparc64_load_keys(&ctx->encrypt_expkey[0]); +	else +		des_sparc64_load_keys(&ctx->decrypt_expkey[0]); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & DES_BLOCK_MASK; + +		if (likely(block_len)) { +			des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr, +					      (u64 *) walk.dst.virt.addr, +					      block_len); +		} +		nbytes &= DES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	return __ecb_crypt(desc, dst, src, nbytes, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	return __ecb_crypt(desc, dst, src, nbytes, false); +} + +extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output, +				    unsigned int len, u64 *iv); + +static int cbc_encrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	des_sparc64_load_keys(&ctx->encrypt_expkey[0]); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & DES_BLOCK_MASK; + +		if (likely(block_len)) { +			des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr, +						(u64 *) walk.dst.virt.addr, +						block_len, (u64 *) walk.iv); +		} +		nbytes &= DES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output, +				    unsigned int len, u64 *iv); + +static int cbc_decrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	des_sparc64_load_keys(&ctx->decrypt_expkey[0]); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & DES_BLOCK_MASK; + +		if (likely(block_len)) { +			des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr, +						(u64 *) walk.dst.virt.addr, +						block_len, (u64 *) walk.iv); +		} +		nbytes &= DES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, +			    unsigned int keylen) +{ +	struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); +	const u32 *K = (const u32 *)key; +	u32 *flags = &tfm->crt_flags; +	u64 k1[DES_EXPKEY_WORDS / 2]; +	u64 k2[DES_EXPKEY_WORDS / 2]; +	u64 k3[DES_EXPKEY_WORDS / 2]; + +	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || +		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && +		     (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { +		*flags |= CRYPTO_TFM_RES_WEAK_KEY; +		return -EINVAL; +	} + +	des_sparc64_key_expand((const u32 *)key, k1); +	key += DES_KEY_SIZE; +	des_sparc64_key_expand((const u32 *)key, k2); +	key += DES_KEY_SIZE; +	des_sparc64_key_expand((const u32 *)key, k3); + +	memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1)); +	encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]); +	memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], +	       &k3[0], sizeof(k3)); + +	encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]); +	memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2], +	       &k2[0], sizeof(k2)); +	encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], +			   &k1[0]); + +	return 0; +} + +extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input, +				   u64 *output); + +static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ +	struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); +	const u64 *K = ctx->encrypt_expkey; + +	des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ +	struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); +	const u64 *K = ctx->decrypt_expkey; + +	des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +extern void des3_ede_sparc64_load_keys(const u64 *key); + +extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input, +				       u64 *output, unsigned int len); + +static int __ecb3_crypt(struct blkcipher_desc *desc, +			struct scatterlist *dst, struct scatterlist *src, +			unsigned int nbytes, bool encrypt) +{ +	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	const u64 *K; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	if (encrypt) +		K = &ctx->encrypt_expkey[0]; +	else +		K = &ctx->decrypt_expkey[0]; +	des3_ede_sparc64_load_keys(K); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & DES_BLOCK_MASK; + +		if (likely(block_len)) { +			const u64 *src64 = (const u64 *)walk.src.virt.addr; +			des3_ede_sparc64_ecb_crypt(K, src64, +						   (u64 *) walk.dst.virt.addr, +						   block_len); +		} +		nbytes &= DES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +static int ecb3_encrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	return __ecb3_crypt(desc, dst, src, nbytes, true); +} + +static int ecb3_decrypt(struct blkcipher_desc *desc, +		       struct scatterlist *dst, struct scatterlist *src, +		       unsigned int nbytes) +{ +	return __ecb3_crypt(desc, dst, src, nbytes, false); +} + +extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input, +					 u64 *output, unsigned int len, +					 u64 *iv); + +static int cbc3_encrypt(struct blkcipher_desc *desc, +			struct scatterlist *dst, struct scatterlist *src, +			unsigned int nbytes) +{ +	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	const u64 *K; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	K = &ctx->encrypt_expkey[0]; +	des3_ede_sparc64_load_keys(K); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & DES_BLOCK_MASK; + +		if (likely(block_len)) { +			const u64 *src64 = (const u64 *)walk.src.virt.addr; +			des3_ede_sparc64_cbc_encrypt(K, src64, +						     (u64 *) walk.dst.virt.addr, +						     block_len, +						     (u64 *) walk.iv); +		} +		nbytes &= DES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input, +					 u64 *output, unsigned int len, +					 u64 *iv); + +static int cbc3_decrypt(struct blkcipher_desc *desc, +			struct scatterlist *dst, struct scatterlist *src, +			unsigned int nbytes) +{ +	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); +	struct blkcipher_walk walk; +	const u64 *K; +	int err; + +	blkcipher_walk_init(&walk, dst, src, nbytes); +	err = blkcipher_walk_virt(desc, &walk); +	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + +	K = &ctx->decrypt_expkey[0]; +	des3_ede_sparc64_load_keys(K); +	while ((nbytes = walk.nbytes)) { +		unsigned int block_len = nbytes & DES_BLOCK_MASK; + +		if (likely(block_len)) { +			const u64 *src64 = (const u64 *)walk.src.virt.addr; +			des3_ede_sparc64_cbc_decrypt(K, src64, +						     (u64 *) walk.dst.virt.addr, +						     block_len, +						     (u64 *) walk.iv); +		} +		nbytes &= DES_BLOCK_SIZE - 1; +		err = blkcipher_walk_done(desc, &walk, nbytes); +	} +	fprs_write(0); +	return err; +} + +static struct crypto_alg algs[] = { { +	.cra_name		= "des", +	.cra_driver_name	= "des-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER, +	.cra_blocksize		= DES_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct des_sparc64_ctx), +	.cra_alignmask		= 7, +	.cra_module		= THIS_MODULE, +	.cra_u	= { +		.cipher	= { +			.cia_min_keysize	= DES_KEY_SIZE, +			.cia_max_keysize	= DES_KEY_SIZE, +			.cia_setkey		= des_set_key, +			.cia_encrypt		= des_encrypt, +			.cia_decrypt		= des_decrypt +		} +	} +}, { +	.cra_name		= "ecb(des)", +	.cra_driver_name	= "ecb-des-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER, +	.cra_blocksize		= DES_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct des_sparc64_ctx), +	.cra_alignmask		= 7, +	.cra_type		= &crypto_blkcipher_type, +	.cra_module		= THIS_MODULE, +	.cra_u = { +		.blkcipher = { +			.min_keysize	= DES_KEY_SIZE, +			.max_keysize	= DES_KEY_SIZE, +			.setkey		= des_set_key, +			.encrypt	= ecb_encrypt, +			.decrypt	= ecb_decrypt, +		}, +	}, +}, { +	.cra_name		= "cbc(des)", +	.cra_driver_name	= "cbc-des-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER, +	.cra_blocksize		= DES_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct des_sparc64_ctx), +	.cra_alignmask		= 7, +	.cra_type		= &crypto_blkcipher_type, +	.cra_module		= THIS_MODULE, +	.cra_u = { +		.blkcipher = { +			.min_keysize	= DES_KEY_SIZE, +			.max_keysize	= DES_KEY_SIZE, +			.setkey		= des_set_key, +			.encrypt	= cbc_encrypt, +			.decrypt	= cbc_decrypt, +		}, +	}, +}, { +	.cra_name		= "des3_ede", +	.cra_driver_name	= "des3_ede-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER, +	.cra_blocksize		= DES3_EDE_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx), +	.cra_alignmask		= 7, +	.cra_module		= THIS_MODULE, +	.cra_u	= { +		.cipher	= { +			.cia_min_keysize	= DES3_EDE_KEY_SIZE, +			.cia_max_keysize	= DES3_EDE_KEY_SIZE, +			.cia_setkey		= des3_ede_set_key, +			.cia_encrypt		= des3_ede_encrypt, +			.cia_decrypt		= des3_ede_decrypt +		} +	} +}, { +	.cra_name		= "ecb(des3_ede)", +	.cra_driver_name	= "ecb-des3_ede-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER, +	.cra_blocksize		= DES3_EDE_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx), +	.cra_alignmask		= 7, +	.cra_type		= &crypto_blkcipher_type, +	.cra_module		= THIS_MODULE, +	.cra_u = { +		.blkcipher = { +			.min_keysize	= DES3_EDE_KEY_SIZE, +			.max_keysize	= DES3_EDE_KEY_SIZE, +			.setkey		= des3_ede_set_key, +			.encrypt	= ecb3_encrypt, +			.decrypt	= ecb3_decrypt, +		}, +	}, +}, { +	.cra_name		= "cbc(des3_ede)", +	.cra_driver_name	= "cbc-des3_ede-sparc64", +	.cra_priority		= SPARC_CR_OPCODE_PRIORITY, +	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER, +	.cra_blocksize		= DES3_EDE_BLOCK_SIZE, +	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx), +	.cra_alignmask		= 7, +	.cra_type		= &crypto_blkcipher_type, +	.cra_module		= THIS_MODULE, +	.cra_u = { +		.blkcipher = { +			.min_keysize	= DES3_EDE_KEY_SIZE, +			.max_keysize	= DES3_EDE_KEY_SIZE, +			.setkey		= des3_ede_set_key, +			.encrypt	= cbc3_encrypt, +			.decrypt	= cbc3_decrypt, +		}, +	}, +} }; + +static bool __init sparc64_has_des_opcode(void) +{ +	unsigned long cfr; + +	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) +		return false; + +	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); +	if (!(cfr & CFR_DES)) +		return false; + +	return true; +} + +static int __init des_sparc64_mod_init(void) +{ +	int i; + +	for (i = 0; i < ARRAY_SIZE(algs); i++) +		INIT_LIST_HEAD(&algs[i].cra_list); + +	if (sparc64_has_des_opcode()) { +		pr_info("Using sparc64 des opcodes optimized DES implementation\n"); +		return crypto_register_algs(algs, ARRAY_SIZE(algs)); +	} +	pr_info("sparc64 des opcodes not available.\n"); +	return -ENODEV; +} + +static void __exit des_sparc64_mod_fini(void) +{ +	crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(des_sparc64_mod_init); +module_exit(des_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); + +MODULE_ALIAS("des"); + +#include "crop_devid.c" diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S new file mode 100644 index 00000000000..3150404e602 --- /dev/null +++ b/arch/sparc/crypto/md5_asm.S @@ -0,0 +1,70 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(md5_sparc64_transform) +	/* %o0 = digest, %o1 = data, %o2 = rounds */ +	VISEntryHalf +	ld	[%o0 + 0x00], %f0 +	ld	[%o0 + 0x04], %f1 +	andcc	%o1, 0x7, %g0 +	ld	[%o0 + 0x08], %f2 +	bne,pn	%xcc, 10f +	 ld	[%o0 + 0x0c], %f3 + +1: +	ldd	[%o1 + 0x00], %f8 +	ldd	[%o1 + 0x08], %f10 +	ldd	[%o1 + 0x10], %f12 +	ldd	[%o1 + 0x18], %f14 +	ldd	[%o1 + 0x20], %f16 +	ldd	[%o1 + 0x28], %f18 +	ldd	[%o1 + 0x30], %f20 +	ldd	[%o1 + 0x38], %f22 + +	MD5 + +	subcc	%o2, 1, %o2 +	bne,pt	%xcc, 1b +	 add	%o1, 0x40, %o1 + +5: +	st	%f0, [%o0 + 0x00] +	st	%f1, [%o0 + 0x04] +	st	%f2, [%o0 + 0x08] +	st	%f3, [%o0 + 0x0c] +	retl +	 VISExitHalf +10: +	alignaddr %o1, %g0, %o1 + +	ldd	[%o1 + 0x00], %f10 +1: +	ldd	[%o1 + 0x08], %f12 +	ldd	[%o1 + 0x10], %f14 +	ldd	[%o1 + 0x18], %f16 +	ldd	[%o1 + 0x20], %f18 +	ldd	[%o1 + 0x28], %f20 +	ldd	[%o1 + 0x30], %f22 +	ldd	[%o1 + 0x38], %f24 +	ldd	[%o1 + 0x40], %f26 + +	faligndata %f10, %f12, %f8 +	faligndata %f12, %f14, %f10 +	faligndata %f14, %f16, %f12 +	faligndata %f16, %f18, %f14 +	faligndata %f18, %f20, %f16 +	faligndata %f20, %f22, %f18 +	faligndata %f22, %f24, %f20 +	faligndata %f24, %f26, %f22 + +	MD5 + +	subcc	%o2, 1, %o2 +	fsrc2	%f26, %f10 +	bne,pt	%xcc, 1b +	 add	%o1, 0x40, %o1 + +	ba,a,pt	%xcc, 5b +ENDPROC(md5_sparc64_transform) diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c new file mode 100644 index 00000000000..09a9ea1dfb6 --- /dev/null +++ b/arch/sparc/crypto/md5_glue.c @@ -0,0 +1,190 @@ +/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c + * and crypto/md5.c which are: + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * Copyright (c) Mathias Krause <minipli@googlemail.com> + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + */ + +#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/md5.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void md5_sparc64_transform(u32 *digest, const char *data, +				      unsigned int rounds); + +static int md5_sparc64_init(struct shash_desc *desc) +{ +	struct md5_state *mctx = shash_desc_ctx(desc); + +	mctx->hash[0] = cpu_to_le32(0x67452301); +	mctx->hash[1] = cpu_to_le32(0xefcdab89); +	mctx->hash[2] = cpu_to_le32(0x98badcfe); +	mctx->hash[3] = cpu_to_le32(0x10325476); +	mctx->byte_count = 0; + +	return 0; +} + +static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data, +				 unsigned int len, unsigned int partial) +{ +	unsigned int done = 0; + +	sctx->byte_count += len; +	if (partial) { +		done = MD5_HMAC_BLOCK_SIZE - partial; +		memcpy((u8 *)sctx->block + partial, data, done); +		md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1); +	} +	if (len - done >= MD5_HMAC_BLOCK_SIZE) { +		const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE; + +		md5_sparc64_transform(sctx->hash, data + done, rounds); +		done += rounds * MD5_HMAC_BLOCK_SIZE; +	} + +	memcpy(sctx->block, data + done, len - done); +} + +static int md5_sparc64_update(struct shash_desc *desc, const u8 *data, +			      unsigned int len) +{ +	struct md5_state *sctx = shash_desc_ctx(desc); +	unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; + +	/* Handle the fast case right here */ +	if (partial + len < MD5_HMAC_BLOCK_SIZE) { +		sctx->byte_count += len; +		memcpy((u8 *)sctx->block + partial, data, len); +	} else +		__md5_sparc64_update(sctx, data, len, partial); + +	return 0; +} + +/* Add padding and return the message digest. */ +static int md5_sparc64_final(struct shash_desc *desc, u8 *out) +{ +	struct md5_state *sctx = shash_desc_ctx(desc); +	unsigned int i, index, padlen; +	u32 *dst = (u32 *)out; +	__le64 bits; +	static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, }; + +	bits = cpu_to_le64(sctx->byte_count << 3); + +	/* Pad out to 56 mod 64 and append length */ +	index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; +	padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index); + +	/* We need to fill a whole block for __md5_sparc64_update() */ +	if (padlen <= 56) { +		sctx->byte_count += padlen; +		memcpy((u8 *)sctx->block + index, padding, padlen); +	} else { +		__md5_sparc64_update(sctx, padding, padlen, index); +	} +	__md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + +	/* Store state in digest */ +	for (i = 0; i < MD5_HASH_WORDS; i++) +		dst[i] = sctx->hash[i]; + +	/* Wipe context */ +	memset(sctx, 0, sizeof(*sctx)); + +	return 0; +} + +static int md5_sparc64_export(struct shash_desc *desc, void *out) +{ +	struct md5_state *sctx = shash_desc_ctx(desc); + +	memcpy(out, sctx, sizeof(*sctx)); + +	return 0; +} + +static int md5_sparc64_import(struct shash_desc *desc, const void *in) +{ +	struct md5_state *sctx = shash_desc_ctx(desc); + +	memcpy(sctx, in, sizeof(*sctx)); + +	return 0; +} + +static struct shash_alg alg = { +	.digestsize	=	MD5_DIGEST_SIZE, +	.init		=	md5_sparc64_init, +	.update		=	md5_sparc64_update, +	.final		=	md5_sparc64_final, +	.export		=	md5_sparc64_export, +	.import		=	md5_sparc64_import, +	.descsize	=	sizeof(struct md5_state), +	.statesize	=	sizeof(struct md5_state), +	.base		=	{ +		.cra_name	=	"md5", +		.cra_driver_name=	"md5-sparc64", +		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY, +		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH, +		.cra_blocksize	=	MD5_HMAC_BLOCK_SIZE, +		.cra_module	=	THIS_MODULE, +	} +}; + +static bool __init sparc64_has_md5_opcode(void) +{ +	unsigned long cfr; + +	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) +		return false; + +	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); +	if (!(cfr & CFR_MD5)) +		return false; + +	return true; +} + +static int __init md5_sparc64_mod_init(void) +{ +	if (sparc64_has_md5_opcode()) { +		pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n"); +		return crypto_register_shash(&alg); +	} +	pr_info("sparc64 md5 opcode not available.\n"); +	return -ENODEV; +} + +static void __exit md5_sparc64_mod_fini(void) +{ +	crypto_unregister_shash(&alg); +} + +module_init(md5_sparc64_mod_init); +module_exit(md5_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); + +MODULE_ALIAS("md5"); + +#include "crop_devid.c" diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h new file mode 100644 index 00000000000..19cbaea6976 --- /dev/null +++ b/arch/sparc/crypto/opcodes.h @@ -0,0 +1,99 @@ +#ifndef _OPCODES_H +#define _OPCODES_H + +#define SPARC_CR_OPCODE_PRIORITY	300 + +#define F3F(x,y,z)	(((x)<<30)|((y)<<19)|((z)<<5)) + +#define FPD_ENCODE(x)	(((x) >> 5) | ((x) & ~(0x20))) + +#define RS1(x)		(FPD_ENCODE(x) << 14) +#define RS2(x)		(FPD_ENCODE(x) <<  0) +#define RS3(x)		(FPD_ENCODE(x) <<  9) +#define RD(x)		(FPD_ENCODE(x) << 25) +#define IMM5_0(x)	((x)           <<  0) +#define IMM5_9(x)	((x)           <<  9) + +#define CRC32C(a,b,c)	\ +	.word		(F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c)); + +#define MD5		\ +	.word	0x81b02800; +#define SHA1		\ +	.word	0x81b02820; +#define SHA256		\ +	.word	0x81b02840; +#define SHA512		\ +	.word	0x81b02860; + +#define AES_EROUND01(a,b,c,d)	\ +	.word	(F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23(a,b,c,d)	\ +	.word	(F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01(a,b,c,d)	\ +	.word	(F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23(a,b,c,d)	\ +	.word	(F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND01_L(a,b,c,d)	\ +	.word	(F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23_L(a,b,c,d)	\ +	.word	(F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01_L(a,b,c,d)	\ +	.word	(F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23_L(a,b,c,d)	\ +	.word	(F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_KEXPAND1(a,b,c,d)	\ +	.word	(F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d)); +#define AES_KEXPAND0(a,b,c)	\ +	.word	(F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c)); +#define AES_KEXPAND2(a,b,c)	\ +	.word	(F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); + +#define DES_IP(a,b)		\ +	.word		(F3F(2, 0x36, 0x134)|RS1(a)|RD(b)); +#define DES_IIP(a,b)		\ +	.word		(F3F(2, 0x36, 0x135)|RS1(a)|RD(b)); +#define DES_KEXPAND(a,b,c)	\ +	.word		(F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c)); +#define DES_ROUND(a,b,c,d)	\ +	.word		(F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d)); + +#define CAMELLIA_F(a,b,c,d)		\ +	.word		(F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define CAMELLIA_FL(a,b,c)		\ +	.word		(F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c)); +#define CAMELLIA_FLI(a,b,c)		\ +	.word		(F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c)); + +#define MOVDTOX_F0_O4		\ +	.word	0x99b02200 +#define MOVDTOX_F2_O5		\ +	.word	0x9bb02202 +#define MOVXTOD_G1_F60 		\ +	.word	0xbbb02301 +#define MOVXTOD_G1_F62 		\ +	.word	0xbfb02301 +#define MOVXTOD_G3_F4		\ +	.word	0x89b02303; +#define MOVXTOD_G7_F6		\ +	.word	0x8db02307; +#define MOVXTOD_G3_F0		\ +	.word	0x81b02303; +#define MOVXTOD_G7_F2		\ +	.word	0x85b02307; +#define MOVXTOD_O0_F0		\ +	.word	0x81b02308; +#define MOVXTOD_O5_F0		\ +	.word	0x81b0230d; +#define MOVXTOD_O5_F2		\ +	.word	0x85b0230d; +#define MOVXTOD_O5_F4		\ +	.word	0x89b0230d; +#define MOVXTOD_O5_F6		\ +	.word	0x8db0230d; +#define MOVXTOD_G3_F60		\ +	.word	0xbbb02303; +#define MOVXTOD_G7_F62		\ +	.word	0xbfb02307; + +#endif /* _OPCODES_H */ diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S new file mode 100644 index 00000000000..219d10c5ae0 --- /dev/null +++ b/arch/sparc/crypto/sha1_asm.S @@ -0,0 +1,72 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(sha1_sparc64_transform) +	/* %o0 = digest, %o1 = data, %o2 = rounds */ +	VISEntryHalf +	ld	[%o0 + 0x00], %f0 +	ld	[%o0 + 0x04], %f1 +	ld	[%o0 + 0x08], %f2 +	andcc	%o1, 0x7, %g0 +	ld	[%o0 + 0x0c], %f3 +	bne,pn	%xcc, 10f +	 ld	[%o0 + 0x10], %f4 + +1: +	ldd	[%o1 + 0x00], %f8 +	ldd	[%o1 + 0x08], %f10 +	ldd	[%o1 + 0x10], %f12 +	ldd	[%o1 + 0x18], %f14 +	ldd	[%o1 + 0x20], %f16 +	ldd	[%o1 + 0x28], %f18 +	ldd	[%o1 + 0x30], %f20 +	ldd	[%o1 + 0x38], %f22 + +	SHA1 + +	subcc	%o2, 1, %o2 +	bne,pt	%xcc, 1b +	 add	%o1, 0x40, %o1 + +5: +	st	%f0, [%o0 + 0x00] +	st	%f1, [%o0 + 0x04] +	st	%f2, [%o0 + 0x08] +	st	%f3, [%o0 + 0x0c] +	st	%f4, [%o0 + 0x10] +	retl +	 VISExitHalf +10: +	alignaddr %o1, %g0, %o1 + +	ldd	[%o1 + 0x00], %f10 +1: +	ldd	[%o1 + 0x08], %f12 +	ldd	[%o1 + 0x10], %f14 +	ldd	[%o1 + 0x18], %f16 +	ldd	[%o1 + 0x20], %f18 +	ldd	[%o1 + 0x28], %f20 +	ldd	[%o1 + 0x30], %f22 +	ldd	[%o1 + 0x38], %f24 +	ldd	[%o1 + 0x40], %f26 + +	faligndata %f10, %f12, %f8 +	faligndata %f12, %f14, %f10 +	faligndata %f14, %f16, %f12 +	faligndata %f16, %f18, %f14 +	faligndata %f18, %f20, %f16 +	faligndata %f20, %f22, %f18 +	faligndata %f22, %f24, %f20 +	faligndata %f24, %f26, %f22 + +	SHA1 + +	subcc	%o2, 1, %o2 +	fsrc2	%f26, %f10 +	bne,pt	%xcc, 1b +	 add	%o1, 0x40, %o1 + +	ba,a,pt	%xcc, 5b +ENDPROC(sha1_sparc64_transform) diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c new file mode 100644 index 00000000000..6cd5f29e1e0 --- /dev/null +++ b/arch/sparc/crypto/sha1_glue.c @@ -0,0 +1,185 @@ +/* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * Copyright (c) Mathias Krause <minipli@googlemail.com> + */ + +#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data, +				       unsigned int rounds); + +static int sha1_sparc64_init(struct shash_desc *desc) +{ +	struct sha1_state *sctx = shash_desc_ctx(desc); + +	*sctx = (struct sha1_state){ +		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, +	}; + +	return 0; +} + +static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data, +				  unsigned int len, unsigned int partial) +{ +	unsigned int done = 0; + +	sctx->count += len; +	if (partial) { +		done = SHA1_BLOCK_SIZE - partial; +		memcpy(sctx->buffer + partial, data, done); +		sha1_sparc64_transform(sctx->state, sctx->buffer, 1); +	} +	if (len - done >= SHA1_BLOCK_SIZE) { +		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + +		sha1_sparc64_transform(sctx->state, data + done, rounds); +		done += rounds * SHA1_BLOCK_SIZE; +	} + +	memcpy(sctx->buffer, data + done, len - done); +} + +static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data, +			       unsigned int len) +{ +	struct sha1_state *sctx = shash_desc_ctx(desc); +	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + +	/* Handle the fast case right here */ +	if (partial + len < SHA1_BLOCK_SIZE) { +		sctx->count += len; +		memcpy(sctx->buffer + partial, data, len); +	} else +		__sha1_sparc64_update(sctx, data, len, partial); + +	return 0; +} + +/* Add padding and return the message digest. */ +static int sha1_sparc64_final(struct shash_desc *desc, u8 *out) +{ +	struct sha1_state *sctx = shash_desc_ctx(desc); +	unsigned int i, index, padlen; +	__be32 *dst = (__be32 *)out; +	__be64 bits; +	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + +	bits = cpu_to_be64(sctx->count << 3); + +	/* Pad out to 56 mod 64 and append length */ +	index = sctx->count % SHA1_BLOCK_SIZE; +	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + +	/* We need to fill a whole block for __sha1_sparc64_update() */ +	if (padlen <= 56) { +		sctx->count += padlen; +		memcpy(sctx->buffer + index, padding, padlen); +	} else { +		__sha1_sparc64_update(sctx, padding, padlen, index); +	} +	__sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + +	/* Store state in digest */ +	for (i = 0; i < 5; i++) +		dst[i] = cpu_to_be32(sctx->state[i]); + +	/* Wipe context */ +	memset(sctx, 0, sizeof(*sctx)); + +	return 0; +} + +static int sha1_sparc64_export(struct shash_desc *desc, void *out) +{ +	struct sha1_state *sctx = shash_desc_ctx(desc); + +	memcpy(out, sctx, sizeof(*sctx)); + +	return 0; +} + +static int sha1_sparc64_import(struct shash_desc *desc, const void *in) +{ +	struct sha1_state *sctx = shash_desc_ctx(desc); + +	memcpy(sctx, in, sizeof(*sctx)); + +	return 0; +} + +static struct shash_alg alg = { +	.digestsize	=	SHA1_DIGEST_SIZE, +	.init		=	sha1_sparc64_init, +	.update		=	sha1_sparc64_update, +	.final		=	sha1_sparc64_final, +	.export		=	sha1_sparc64_export, +	.import		=	sha1_sparc64_import, +	.descsize	=	sizeof(struct sha1_state), +	.statesize	=	sizeof(struct sha1_state), +	.base		=	{ +		.cra_name	=	"sha1", +		.cra_driver_name=	"sha1-sparc64", +		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY, +		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH, +		.cra_blocksize	=	SHA1_BLOCK_SIZE, +		.cra_module	=	THIS_MODULE, +	} +}; + +static bool __init sparc64_has_sha1_opcode(void) +{ +	unsigned long cfr; + +	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) +		return false; + +	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); +	if (!(cfr & CFR_SHA1)) +		return false; + +	return true; +} + +static int __init sha1_sparc64_mod_init(void) +{ +	if (sparc64_has_sha1_opcode()) { +		pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n"); +		return crypto_register_shash(&alg); +	} +	pr_info("sparc64 sha1 opcode not available.\n"); +	return -ENODEV; +} + +static void __exit sha1_sparc64_mod_fini(void) +{ +	crypto_unregister_shash(&alg); +} + +module_init(sha1_sparc64_mod_init); +module_exit(sha1_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated"); + +MODULE_ALIAS("sha1"); + +#include "crop_devid.c" diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S new file mode 100644 index 00000000000..b5f3d5826eb --- /dev/null +++ b/arch/sparc/crypto/sha256_asm.S @@ -0,0 +1,78 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(sha256_sparc64_transform) +	/* %o0 = digest, %o1 = data, %o2 = rounds */ +	VISEntryHalf +	ld	[%o0 + 0x00], %f0 +	ld	[%o0 + 0x04], %f1 +	ld	[%o0 + 0x08], %f2 +	ld	[%o0 + 0x0c], %f3 +	ld	[%o0 + 0x10], %f4 +	ld	[%o0 + 0x14], %f5 +	andcc	%o1, 0x7, %g0 +	ld	[%o0 + 0x18], %f6 +	bne,pn	%xcc, 10f +	 ld	[%o0 + 0x1c], %f7 + +1: +	ldd	[%o1 + 0x00], %f8 +	ldd	[%o1 + 0x08], %f10 +	ldd	[%o1 + 0x10], %f12 +	ldd	[%o1 + 0x18], %f14 +	ldd	[%o1 + 0x20], %f16 +	ldd	[%o1 + 0x28], %f18 +	ldd	[%o1 + 0x30], %f20 +	ldd	[%o1 + 0x38], %f22 + +	SHA256 + +	subcc	%o2, 1, %o2 +	bne,pt	%xcc, 1b +	 add	%o1, 0x40, %o1 + +5: +	st	%f0, [%o0 + 0x00] +	st	%f1, [%o0 + 0x04] +	st	%f2, [%o0 + 0x08] +	st	%f3, [%o0 + 0x0c] +	st	%f4, [%o0 + 0x10] +	st	%f5, [%o0 + 0x14] +	st	%f6, [%o0 + 0x18] +	st	%f7, [%o0 + 0x1c] +	retl +	 VISExitHalf +10: +	alignaddr %o1, %g0, %o1 + +	ldd	[%o1 + 0x00], %f10 +1: +	ldd	[%o1 + 0x08], %f12 +	ldd	[%o1 + 0x10], %f14 +	ldd	[%o1 + 0x18], %f16 +	ldd	[%o1 + 0x20], %f18 +	ldd	[%o1 + 0x28], %f20 +	ldd	[%o1 + 0x30], %f22 +	ldd	[%o1 + 0x38], %f24 +	ldd	[%o1 + 0x40], %f26 + +	faligndata %f10, %f12, %f8 +	faligndata %f12, %f14, %f10 +	faligndata %f14, %f16, %f12 +	faligndata %f16, %f18, %f14 +	faligndata %f18, %f20, %f16 +	faligndata %f20, %f22, %f18 +	faligndata %f22, %f24, %f20 +	faligndata %f24, %f26, %f22 + +	SHA256 + +	subcc	%o2, 1, %o2 +	fsrc2	%f26, %f10 +	bne,pt	%xcc, 1b +	 add	%o1, 0x40, %o1 + +	ba,a,pt	%xcc, 5b +ENDPROC(sha256_sparc64_transform) diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c new file mode 100644 index 00000000000..04f555ab268 --- /dev/null +++ b/arch/sparc/crypto/sha256_glue.c @@ -0,0 +1,243 @@ +/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon crypto/sha256_generic.c + * + * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com> + */ + +#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data, +					 unsigned int rounds); + +static int sha224_sparc64_init(struct shash_desc *desc) +{ +	struct sha256_state *sctx = shash_desc_ctx(desc); +	sctx->state[0] = SHA224_H0; +	sctx->state[1] = SHA224_H1; +	sctx->state[2] = SHA224_H2; +	sctx->state[3] = SHA224_H3; +	sctx->state[4] = SHA224_H4; +	sctx->state[5] = SHA224_H5; +	sctx->state[6] = SHA224_H6; +	sctx->state[7] = SHA224_H7; +	sctx->count = 0; + +	return 0; +} + +static int sha256_sparc64_init(struct shash_desc *desc) +{ +	struct sha256_state *sctx = shash_desc_ctx(desc); +	sctx->state[0] = SHA256_H0; +	sctx->state[1] = SHA256_H1; +	sctx->state[2] = SHA256_H2; +	sctx->state[3] = SHA256_H3; +	sctx->state[4] = SHA256_H4; +	sctx->state[5] = SHA256_H5; +	sctx->state[6] = SHA256_H6; +	sctx->state[7] = SHA256_H7; +	sctx->count = 0; + +	return 0; +} + +static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data, +				    unsigned int len, unsigned int partial) +{ +	unsigned int done = 0; + +	sctx->count += len; +	if (partial) { +		done = SHA256_BLOCK_SIZE - partial; +		memcpy(sctx->buf + partial, data, done); +		sha256_sparc64_transform(sctx->state, sctx->buf, 1); +	} +	if (len - done >= SHA256_BLOCK_SIZE) { +		const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + +		sha256_sparc64_transform(sctx->state, data + done, rounds); +		done += rounds * SHA256_BLOCK_SIZE; +	} + +	memcpy(sctx->buf, data + done, len - done); +} + +static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data, +				 unsigned int len) +{ +	struct sha256_state *sctx = shash_desc_ctx(desc); +	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + +	/* Handle the fast case right here */ +	if (partial + len < SHA256_BLOCK_SIZE) { +		sctx->count += len; +		memcpy(sctx->buf + partial, data, len); +	} else +		__sha256_sparc64_update(sctx, data, len, partial); + +	return 0; +} + +static int sha256_sparc64_final(struct shash_desc *desc, u8 *out) +{ +	struct sha256_state *sctx = shash_desc_ctx(desc); +	unsigned int i, index, padlen; +	__be32 *dst = (__be32 *)out; +	__be64 bits; +	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + +	bits = cpu_to_be64(sctx->count << 3); + +	/* Pad out to 56 mod 64 and append length */ +	index = sctx->count % SHA256_BLOCK_SIZE; +	padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index); + +	/* We need to fill a whole block for __sha256_sparc64_update() */ +	if (padlen <= 56) { +		sctx->count += padlen; +		memcpy(sctx->buf + index, padding, padlen); +	} else { +		__sha256_sparc64_update(sctx, padding, padlen, index); +	} +	__sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + +	/* Store state in digest */ +	for (i = 0; i < 8; i++) +		dst[i] = cpu_to_be32(sctx->state[i]); + +	/* Wipe context */ +	memset(sctx, 0, sizeof(*sctx)); + +	return 0; +} + +static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash) +{ +	u8 D[SHA256_DIGEST_SIZE]; + +	sha256_sparc64_final(desc, D); + +	memcpy(hash, D, SHA224_DIGEST_SIZE); +	memset(D, 0, SHA256_DIGEST_SIZE); + +	return 0; +} + +static int sha256_sparc64_export(struct shash_desc *desc, void *out) +{ +	struct sha256_state *sctx = shash_desc_ctx(desc); + +	memcpy(out, sctx, sizeof(*sctx)); +	return 0; +} + +static int sha256_sparc64_import(struct shash_desc *desc, const void *in) +{ +	struct sha256_state *sctx = shash_desc_ctx(desc); + +	memcpy(sctx, in, sizeof(*sctx)); +	return 0; +} + +static struct shash_alg sha256 = { +	.digestsize	=	SHA256_DIGEST_SIZE, +	.init		=	sha256_sparc64_init, +	.update		=	sha256_sparc64_update, +	.final		=	sha256_sparc64_final, +	.export		=	sha256_sparc64_export, +	.import		=	sha256_sparc64_import, +	.descsize	=	sizeof(struct sha256_state), +	.statesize	=	sizeof(struct sha256_state), +	.base		=	{ +		.cra_name	=	"sha256", +		.cra_driver_name=	"sha256-sparc64", +		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY, +		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH, +		.cra_blocksize	=	SHA256_BLOCK_SIZE, +		.cra_module	=	THIS_MODULE, +	} +}; + +static struct shash_alg sha224 = { +	.digestsize	=	SHA224_DIGEST_SIZE, +	.init		=	sha224_sparc64_init, +	.update		=	sha256_sparc64_update, +	.final		=	sha224_sparc64_final, +	.descsize	=	sizeof(struct sha256_state), +	.base		=	{ +		.cra_name	=	"sha224", +		.cra_driver_name=	"sha224-sparc64", +		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY, +		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH, +		.cra_blocksize	=	SHA224_BLOCK_SIZE, +		.cra_module	=	THIS_MODULE, +	} +}; + +static bool __init sparc64_has_sha256_opcode(void) +{ +	unsigned long cfr; + +	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) +		return false; + +	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); +	if (!(cfr & CFR_SHA256)) +		return false; + +	return true; +} + +static int __init sha256_sparc64_mod_init(void) +{ +	if (sparc64_has_sha256_opcode()) { +		int ret = crypto_register_shash(&sha224); +		if (ret < 0) +			return ret; + +		ret = crypto_register_shash(&sha256); +		if (ret < 0) { +			crypto_unregister_shash(&sha224); +			return ret; +		} + +		pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n"); +		return 0; +	} +	pr_info("sparc64 sha256 opcode not available.\n"); +	return -ENODEV; +} + +static void __exit sha256_sparc64_mod_fini(void) +{ +	crypto_unregister_shash(&sha224); +	crypto_unregister_shash(&sha256); +} + +module_init(sha256_sparc64_mod_init); +module_exit(sha256_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated"); + +MODULE_ALIAS("sha224"); +MODULE_ALIAS("sha256"); + +#include "crop_devid.c" diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S new file mode 100644 index 00000000000..54bfba713c0 --- /dev/null +++ b/arch/sparc/crypto/sha512_asm.S @@ -0,0 +1,102 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(sha512_sparc64_transform) +	/* %o0 = digest, %o1 = data, %o2 = rounds */ +	VISEntry +	ldd	[%o0 + 0x00], %f0 +	ldd	[%o0 + 0x08], %f2 +	ldd	[%o0 + 0x10], %f4 +	ldd	[%o0 + 0x18], %f6 +	ldd	[%o0 + 0x20], %f8 +	ldd	[%o0 + 0x28], %f10 +	andcc	%o1, 0x7, %g0 +	ldd	[%o0 + 0x30], %f12 +	bne,pn	%xcc, 10f +	 ldd	[%o0 + 0x38], %f14 + +1: +	ldd	[%o1 + 0x00], %f16 +	ldd	[%o1 + 0x08], %f18 +	ldd	[%o1 + 0x10], %f20 +	ldd	[%o1 + 0x18], %f22 +	ldd	[%o1 + 0x20], %f24 +	ldd	[%o1 + 0x28], %f26 +	ldd	[%o1 + 0x30], %f28 +	ldd	[%o1 + 0x38], %f30 +	ldd	[%o1 + 0x40], %f32 +	ldd	[%o1 + 0x48], %f34 +	ldd	[%o1 + 0x50], %f36 +	ldd	[%o1 + 0x58], %f38 +	ldd	[%o1 + 0x60], %f40 +	ldd	[%o1 + 0x68], %f42 +	ldd	[%o1 + 0x70], %f44 +	ldd	[%o1 + 0x78], %f46 + +	SHA512 + +	subcc	%o2, 1, %o2 +	bne,pt	%xcc, 1b +	 add	%o1, 0x80, %o1 + +5: +	std	%f0, [%o0 + 0x00] +	std	%f2, [%o0 + 0x08] +	std	%f4, [%o0 + 0x10] +	std	%f6, [%o0 + 0x18] +	std	%f8, [%o0 + 0x20] +	std	%f10, [%o0 + 0x28] +	std	%f12, [%o0 + 0x30] +	std	%f14, [%o0 + 0x38] +	retl +	 VISExit +10: +	alignaddr %o1, %g0, %o1 + +	ldd	[%o1 + 0x00], %f18 +1: +	ldd	[%o1 + 0x08], %f20 +	ldd	[%o1 + 0x10], %f22 +	ldd	[%o1 + 0x18], %f24 +	ldd	[%o1 + 0x20], %f26 +	ldd	[%o1 + 0x28], %f28 +	ldd	[%o1 + 0x30], %f30 +	ldd	[%o1 + 0x38], %f32 +	ldd	[%o1 + 0x40], %f34 +	ldd	[%o1 + 0x48], %f36 +	ldd	[%o1 + 0x50], %f38 +	ldd	[%o1 + 0x58], %f40 +	ldd	[%o1 + 0x60], %f42 +	ldd	[%o1 + 0x68], %f44 +	ldd	[%o1 + 0x70], %f46 +	ldd	[%o1 + 0x78], %f48 +	ldd	[%o1 + 0x80], %f50 + +	faligndata %f18, %f20, %f16 +	faligndata %f20, %f22, %f18 +	faligndata %f22, %f24, %f20 +	faligndata %f24, %f26, %f22 +	faligndata %f26, %f28, %f24 +	faligndata %f28, %f30, %f26 +	faligndata %f30, %f32, %f28 +	faligndata %f32, %f34, %f30 +	faligndata %f34, %f36, %f32 +	faligndata %f36, %f38, %f34 +	faligndata %f38, %f40, %f36 +	faligndata %f40, %f42, %f38 +	faligndata %f42, %f44, %f40 +	faligndata %f44, %f46, %f42 +	faligndata %f46, %f48, %f44 +	faligndata %f48, %f50, %f46 + +	SHA512 + +	subcc	%o2, 1, %o2 +	fsrc2	%f50, %f18 +	bne,pt	%xcc, 1b +	 add	%o1, 0x80, %o1 + +	ba,a,pt	%xcc, 5b +ENDPROC(sha512_sparc64_transform) diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c new file mode 100644 index 00000000000..f04d1994d19 --- /dev/null +++ b/arch/sparc/crypto/sha512_glue.c @@ -0,0 +1,228 @@ +/* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon crypto/sha512_generic.c + * + * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) 2003 Kyle McMartin <kyle@debian.org> + */ + +#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data, +					 unsigned int rounds); + +static int sha512_sparc64_init(struct shash_desc *desc) +{ +	struct sha512_state *sctx = shash_desc_ctx(desc); +	sctx->state[0] = SHA512_H0; +	sctx->state[1] = SHA512_H1; +	sctx->state[2] = SHA512_H2; +	sctx->state[3] = SHA512_H3; +	sctx->state[4] = SHA512_H4; +	sctx->state[5] = SHA512_H5; +	sctx->state[6] = SHA512_H6; +	sctx->state[7] = SHA512_H7; +	sctx->count[0] = sctx->count[1] = 0; + +	return 0; +} + +static int sha384_sparc64_init(struct shash_desc *desc) +{ +	struct sha512_state *sctx = shash_desc_ctx(desc); +	sctx->state[0] = SHA384_H0; +	sctx->state[1] = SHA384_H1; +	sctx->state[2] = SHA384_H2; +	sctx->state[3] = SHA384_H3; +	sctx->state[4] = SHA384_H4; +	sctx->state[5] = SHA384_H5; +	sctx->state[6] = SHA384_H6; +	sctx->state[7] = SHA384_H7; +	sctx->count[0] = sctx->count[1] = 0; + +	return 0; +} + +static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data, +				    unsigned int len, unsigned int partial) +{ +	unsigned int done = 0; + +	if ((sctx->count[0] += len) < len) +		sctx->count[1]++; +	if (partial) { +		done = SHA512_BLOCK_SIZE - partial; +		memcpy(sctx->buf + partial, data, done); +		sha512_sparc64_transform(sctx->state, sctx->buf, 1); +	} +	if (len - done >= SHA512_BLOCK_SIZE) { +		const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; + +		sha512_sparc64_transform(sctx->state, data + done, rounds); +		done += rounds * SHA512_BLOCK_SIZE; +	} + +	memcpy(sctx->buf, data + done, len - done); +} + +static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data, +				 unsigned int len) +{ +	struct sha512_state *sctx = shash_desc_ctx(desc); +	unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; + +	/* Handle the fast case right here */ +	if (partial + len < SHA512_BLOCK_SIZE) { +		if ((sctx->count[0] += len) < len) +			sctx->count[1]++; +		memcpy(sctx->buf + partial, data, len); +	} else +		__sha512_sparc64_update(sctx, data, len, partial); + +	return 0; +} + +static int sha512_sparc64_final(struct shash_desc *desc, u8 *out) +{ +	struct sha512_state *sctx = shash_desc_ctx(desc); +	unsigned int i, index, padlen; +	__be64 *dst = (__be64 *)out; +	__be64 bits[2]; +	static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; + +	/* Save number of bits */ +	bits[1] = cpu_to_be64(sctx->count[0] << 3); +	bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); + +	/* Pad out to 112 mod 128 and append length */ +	index = sctx->count[0] % SHA512_BLOCK_SIZE; +	padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index); + +	/* We need to fill a whole block for __sha512_sparc64_update() */ +	if (padlen <= 112) { +		if ((sctx->count[0] += padlen) < padlen) +			sctx->count[1]++; +		memcpy(sctx->buf + index, padding, padlen); +	} else { +		__sha512_sparc64_update(sctx, padding, padlen, index); +	} +	__sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112); + +	/* Store state in digest */ +	for (i = 0; i < 8; i++) +		dst[i] = cpu_to_be64(sctx->state[i]); + +	/* Wipe context */ +	memset(sctx, 0, sizeof(*sctx)); + +	return 0; +} + +static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash) +{ +	u8 D[64]; + +	sha512_sparc64_final(desc, D); + +	memcpy(hash, D, 48); +	memset(D, 0, 64); + +	return 0; +} + +static struct shash_alg sha512 = { +	.digestsize	=	SHA512_DIGEST_SIZE, +	.init		=	sha512_sparc64_init, +	.update		=	sha512_sparc64_update, +	.final		=	sha512_sparc64_final, +	.descsize	=	sizeof(struct sha512_state), +	.base		=	{ +		.cra_name	=	"sha512", +		.cra_driver_name=	"sha512-sparc64", +		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY, +		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH, +		.cra_blocksize	=	SHA512_BLOCK_SIZE, +		.cra_module	=	THIS_MODULE, +	} +}; + +static struct shash_alg sha384 = { +	.digestsize	=	SHA384_DIGEST_SIZE, +	.init		=	sha384_sparc64_init, +	.update		=	sha512_sparc64_update, +	.final		=	sha384_sparc64_final, +	.descsize	=	sizeof(struct sha512_state), +	.base		=	{ +		.cra_name	=	"sha384", +		.cra_driver_name=	"sha384-sparc64", +		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY, +		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH, +		.cra_blocksize	=	SHA384_BLOCK_SIZE, +		.cra_module	=	THIS_MODULE, +	} +}; + +static bool __init sparc64_has_sha512_opcode(void) +{ +	unsigned long cfr; + +	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) +		return false; + +	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); +	if (!(cfr & CFR_SHA512)) +		return false; + +	return true; +} + +static int __init sha512_sparc64_mod_init(void) +{ +	if (sparc64_has_sha512_opcode()) { +		int ret = crypto_register_shash(&sha384); +		if (ret < 0) +			return ret; + +		ret = crypto_register_shash(&sha512); +		if (ret < 0) { +			crypto_unregister_shash(&sha384); +			return ret; +		} + +		pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n"); +		return 0; +	} +	pr_info("sparc64 sha512 opcode not available.\n"); +	return -ENODEV; +} + +static void __exit sha512_sparc64_mod_fini(void) +{ +	crypto_unregister_shash(&sha384); +	crypto_unregister_shash(&sha512); +} + +module_init(sha512_sparc64_mod_init); +module_exit(sha512_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated"); + +MODULE_ALIAS("sha384"); +MODULE_ALIAS("sha512"); + +#include "crop_devid.c"  | 
