aboutsummaryrefslogtreecommitdiff
path: root/arch/sparc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc')
-rw-r--r--arch/sparc/Kbuild1
-rw-r--r--arch/sparc/Kconfig41
-rw-r--r--arch/sparc/crypto/Makefile25
-rw-r--r--arch/sparc/crypto/aes_asm.S1535
-rw-r--r--arch/sparc/crypto/aes_glue.c477
-rw-r--r--arch/sparc/crypto/camellia_asm.S563
-rw-r--r--arch/sparc/crypto/camellia_glue.c322
-rw-r--r--arch/sparc/crypto/crc32c_asm.S20
-rw-r--r--arch/sparc/crypto/crc32c_glue.c179
-rw-r--r--arch/sparc/crypto/crop_devid.c14
-rw-r--r--arch/sparc/crypto/des_asm.S418
-rw-r--r--arch/sparc/crypto/des_glue.c529
-rw-r--r--arch/sparc/crypto/md5_asm.S70
-rw-r--r--arch/sparc/crypto/md5_glue.c188
-rw-r--r--arch/sparc/crypto/opcodes.h99
-rw-r--r--arch/sparc/crypto/sha1_asm.S72
-rw-r--r--arch/sparc/crypto/sha1_glue.c183
-rw-r--r--arch/sparc/crypto/sha256_asm.S78
-rw-r--r--arch/sparc/crypto/sha256_glue.c241
-rw-r--r--arch/sparc/crypto/sha512_asm.S102
-rw-r--r--arch/sparc/crypto/sha512_glue.c226
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/asi.h23
-rw-r--r--arch/sparc/include/asm/compat.h61
-rw-r--r--arch/sparc/include/asm/elf_32.h3
-rw-r--r--arch/sparc/include/asm/elf_64.h9
-rw-r--r--arch/sparc/include/asm/hugetlb.h9
-rw-r--r--arch/sparc/include/asm/hypervisor.h11
-rw-r--r--arch/sparc/include/asm/mdesc.h1
-rw-r--r--arch/sparc/include/asm/mmu_64.h19
-rw-r--r--arch/sparc/include/asm/mmu_context_64.h2
-rw-r--r--arch/sparc/include/asm/oplib_32.h2
-rw-r--r--arch/sparc/include/asm/oplib_64.h2
-rw-r--r--arch/sparc/include/asm/page_64.h21
-rw-r--r--arch/sparc/include/asm/pcr.h36
-rw-r--r--arch/sparc/include/asm/perfctr.h30
-rw-r--r--arch/sparc/include/asm/pgalloc_64.h56
-rw-r--r--arch/sparc/include/asm/pgtable_64.h253
-rw-r--r--arch/sparc/include/asm/pstate.h14
-rw-r--r--arch/sparc/include/asm/siginfo.h1
-rw-r--r--arch/sparc/include/asm/tsb.h106
-rw-r--r--arch/sparc/include/asm/unistd.h1
-rw-r--r--arch/sparc/include/uapi/asm/Kbuild5
-rw-r--r--arch/sparc/kernel/head_64.S14
-rw-r--r--arch/sparc/kernel/hvapi.c3
-rw-r--r--arch/sparc/kernel/hvcalls.S16
-rw-r--r--arch/sparc/kernel/ktlb.S25
-rw-r--r--arch/sparc/kernel/leon_pci.c9
-rw-r--r--arch/sparc/kernel/mdesc.c24
-rw-r--r--arch/sparc/kernel/module.c13
-rw-r--r--arch/sparc/kernel/nmi.c21
-rw-r--r--arch/sparc/kernel/pci.c6
-rw-r--r--arch/sparc/kernel/pci_sun4v.c2
-rw-r--r--arch/sparc/kernel/pcr.c172
-rw-r--r--arch/sparc/kernel/perf_event.c516
-rw-r--r--arch/sparc/kernel/prom_64.c2
-rw-r--r--arch/sparc/kernel/setup_64.c67
-rw-r--r--arch/sparc/kernel/signal32.c52
-rw-r--r--arch/sparc/kernel/sun4v_tlb_miss.S2
-rw-r--r--arch/sparc/kernel/sys32.S2
-rw-r--r--arch/sparc/kernel/sys_sparc32.c46
-rw-r--r--arch/sparc/kernel/traps_64.c2
-rw-r--r--arch/sparc/kernel/tsb.S9
-rw-r--r--arch/sparc/lib/Makefile3
-rw-r--r--arch/sparc/lib/NG2memcpy.S46
-rw-r--r--arch/sparc/lib/NG4clear_page.S29
-rw-r--r--arch/sparc/lib/NG4copy_from_user.S30
-rw-r--r--arch/sparc/lib/NG4copy_page.S57
-rw-r--r--arch/sparc/lib/NG4copy_to_user.S39
-rw-r--r--arch/sparc/lib/NG4memcpy.S360
-rw-r--r--arch/sparc/lib/NG4memset.S105
-rw-r--r--arch/sparc/lib/NG4patch.S54
-rw-r--r--arch/sparc/lib/NGpage.S2
-rw-r--r--arch/sparc/lib/ksyms.c4
-rw-r--r--arch/sparc/mm/fault_32.c1
-rw-r--r--arch/sparc/mm/fault_64.c5
-rw-r--r--arch/sparc/mm/hugetlbpage.c50
-rw-r--r--arch/sparc/mm/init_64.c551
-rw-r--r--arch/sparc/mm/init_64.h4
-rw-r--r--arch/sparc/mm/iommu.c4
-rw-r--r--arch/sparc/mm/tlb.c118
-rw-r--r--arch/sparc/mm/tsb.c40
-rw-r--r--arch/sparc/net/bpf_jit_comp.c4
83 files changed, 7839 insertions, 719 deletions
diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild
index 5cd01161fd0..675afa285dd 100644
--- a/arch/sparc/Kbuild
+++ b/arch/sparc/Kbuild
@@ -6,3 +6,4 @@ obj-y += kernel/
obj-y += mm/
obj-y += math-emu/
obj-y += net/
+obj-y += crypto/
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 67f1f6f5f4e..91c780c973b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -18,6 +18,7 @@ config SPARC
select HAVE_OPROFILE
select HAVE_ARCH_KGDB if !SMP || SPARC64
select HAVE_ARCH_TRACEHOOK
+ select SYSCTL_EXCEPTION_TRACE
select ARCH_WANT_OPTIONAL_GPIOLIB
select RTC_CLASS
select RTC_DRV_M48T59
@@ -32,6 +33,7 @@ config SPARC
select GENERIC_PCI_IOMAP
select HAVE_NMI_WATCHDOG if SPARC64
select HAVE_BPF_JIT
+ select HAVE_DEBUG_BUGVERBOSE
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CMOS_UPDATE
select GENERIC_CLOCKEVENTS
@@ -42,6 +44,7 @@ config SPARC32
def_bool !64BIT
select GENERIC_ATOMIC64
select CLZ_TAB
+ select HAVE_UID16
config SPARC64
def_bool 64BIT
@@ -59,6 +62,7 @@ config SPARC64
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_DEBUG_KMEMLEAK
select RTC_DRV_CMOS
select RTC_DRV_BQ4802
select RTC_DRV_SUN4V
@@ -226,25 +230,6 @@ config EARLYFB
help
Say Y here to enable a faster early framebuffer boot console.
-choice
- prompt "Kernel page size" if SPARC64
- default SPARC64_PAGE_SIZE_8KB
-
-config SPARC64_PAGE_SIZE_8KB
- bool "8KB"
- help
- This lets you select the page size of the kernel.
-
- 8KB and 64KB work quite well, since SPARC ELF sections
- provide for up to 64KB alignment.
-
- If you don't know what to do, choose 8KB.
-
-config SPARC64_PAGE_SIZE_64KB
- bool "64KB"
-
-endchoice
-
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on SPARC64 && PROC_FS
@@ -316,23 +301,6 @@ config GENERIC_LOCKBREAK
default y
depends on SPARC64 && SMP && PREEMPT
-choice
- prompt "SPARC64 Huge TLB Page Size"
- depends on SPARC64 && HUGETLB_PAGE
- default HUGETLB_PAGE_SIZE_4MB
-
-config HUGETLB_PAGE_SIZE_4MB
- bool "4MB"
-
-config HUGETLB_PAGE_SIZE_512K
- bool "512K"
-
-config HUGETLB_PAGE_SIZE_64K
- depends on !SPARC64_PAGE_SIZE_64KB
- bool "64K"
-
-endchoice
-
config NUMA
bool "NUMA support"
depends on SPARC64 && SMP
@@ -571,6 +539,7 @@ config COMPAT
depends on SPARC64
default y
select COMPAT_BINFMT_ELF
+ select HAVE_UID16
select ARCH_WANT_OLD_COMPAT_IPC
config SYSVIPC_COMPAT
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
new file mode 100644
index 00000000000..6ae1ad5e502
--- /dev/null
+++ b/arch/sparc/crypto/Makefile
@@ -0,0 +1,25 @@
+#
+# Arch-specific CryptoAPI modules.
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o
+obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o
+obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o
+obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
+
+obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
+obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
+obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o
+
+obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
+
+sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o
+sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o
+sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o
+md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o
+
+aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o
+des-sparc64-y := des_asm.o des_glue.o crop_devid.o
+camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o
+
+crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
new file mode 100644
index 00000000000..23f6cbb910d
--- /dev/null
+++ b/arch/sparc/crypto/aes_asm.S
@@ -0,0 +1,1535 @@
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
+ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
+ AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
+ AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \
+ AES_EROUND23(KEY_BASE + 6, T0, T1, I1)
+
+#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
+ AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
+ AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \
+ AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \
+ AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \
+ AES_EROUND23(KEY_BASE + 6, T0, T1, I1) \
+ AES_EROUND01(KEY_BASE + 4, T2, T3, I2) \
+ AES_EROUND23(KEY_BASE + 6, T2, T3, I3)
+
+#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
+ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
+ AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
+ AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \
+ AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1)
+
+#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
+ AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
+ AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \
+ AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \
+ AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \
+ AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) \
+ AES_EROUND01_L(KEY_BASE + 4, T2, T3, I2) \
+ AES_EROUND23_L(KEY_BASE + 6, T2, T3, I3)
+
+ /* 10 rounds */
+#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
+
+#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
+
+ /* 12 rounds */
+#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
+
+#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
+
+ /* 14 rounds */
+#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
+
+#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
+ TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
+
+#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \
+ ldd [%o0 + 0xd0], %f56; \
+ ldd [%o0 + 0xd8], %f58; \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \
+ ldd [%o0 + 0xe0], %f60; \
+ ldd [%o0 + 0xe8], %f62; \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \
+ AES_EROUND01(KEY_BASE + 48, I0, I1, KEY_BASE + 0) \
+ AES_EROUND23(KEY_BASE + 50, I0, I1, KEY_BASE + 2) \
+ AES_EROUND01(KEY_BASE + 48, I2, I3, KEY_BASE + 4) \
+ AES_EROUND23(KEY_BASE + 50, I2, I3, KEY_BASE + 6) \
+ AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I0) \
+ AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I1) \
+ ldd [%o0 + 0x10], %f8; \
+ ldd [%o0 + 0x18], %f10; \
+ AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I2) \
+ AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I3) \
+ ldd [%o0 + 0x20], %f12; \
+ ldd [%o0 + 0x28], %f14;
+
+#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
+ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
+ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
+ AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \
+ AES_DROUND01(KEY_BASE + 6, T0, T1, I0)
+
+#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
+ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
+ AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \
+ AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \
+ AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \
+ AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \
+ AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \
+ AES_DROUND01(KEY_BASE + 6, T2, T3, I2)
+
+#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
+ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
+ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
+ AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \
+ AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0)
+
+#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
+ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
+ AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \
+ AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \
+ AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \
+ AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \
+ AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \
+ AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2)
+
+ /* 10 rounds */
+#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
+
+#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
+
+ /* 12 rounds */
+#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
+
+#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
+
+ /* 14 rounds */
+#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
+
+#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
+ TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
+
+#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \
+ ldd [%o0 + 0x18], %f56; \
+ ldd [%o0 + 0x10], %f58; \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \
+ ldd [%o0 + 0x08], %f60; \
+ ldd [%o0 + 0x00], %f62; \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \
+ AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \
+ AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \
+ AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \
+ AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \
+ AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \
+ AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \
+ ldd [%o0 + 0xd8], %f8; \
+ ldd [%o0 + 0xd0], %f10; \
+ AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \
+ AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) \
+ ldd [%o0 + 0xc8], %f12; \
+ ldd [%o0 + 0xc0], %f14;
+
+ .align 32
+ENTRY(aes_sparc64_key_expand)
+ /* %o0=input_key, %o1=output_key, %o2=key_len */
+ VISEntry
+ ld [%o0 + 0x00], %f0
+ ld [%o0 + 0x04], %f1
+ ld [%o0 + 0x08], %f2
+ ld [%o0 + 0x0c], %f3
+
+ std %f0, [%o1 + 0x00]
+ std %f2, [%o1 + 0x08]
+ add %o1, 0x10, %o1
+
+ cmp %o2, 24
+ bl 2f
+ nop
+
+ be 1f
+ nop
+
+ /* 256-bit key expansion */
+ ld [%o0 + 0x10], %f4
+ ld [%o0 + 0x14], %f5
+ ld [%o0 + 0x18], %f6
+ ld [%o0 + 0x1c], %f7
+
+ std %f4, [%o1 + 0x00]
+ std %f6, [%o1 + 0x08]
+ add %o1, 0x10, %o1
+
+ AES_KEXPAND1(0, 6, 0x0, 8)
+ AES_KEXPAND2(2, 8, 10)
+ AES_KEXPAND0(4, 10, 12)
+ AES_KEXPAND2(6, 12, 14)
+ AES_KEXPAND1(8, 14, 0x1, 16)
+ AES_KEXPAND2(10, 16, 18)
+ AES_KEXPAND0(12, 18, 20)
+ AES_KEXPAND2(14, 20, 22)
+ AES_KEXPAND1(16, 22, 0x2, 24)
+ AES_KEXPAND2(18, 24, 26)
+ AES_KEXPAND0(20, 26, 28)
+ AES_KEXPAND2(22, 28, 30)
+ AES_KEXPAND1(24, 30, 0x3, 32)
+ AES_KEXPAND2(26, 32, 34)
+ AES_KEXPAND0(28, 34, 36)
+ AES_KEXPAND2(30, 36, 38)
+ AES_KEXPAND1(32, 38, 0x4, 40)
+ AES_KEXPAND2(34, 40, 42)
+ AES_KEXPAND0(36, 42, 44)
+ AES_KEXPAND2(38, 44, 46)
+ AES_KEXPAND1(40, 46, 0x5, 48)
+ AES_KEXPAND2(42, 48, 50)
+ AES_KEXPAND0(44, 50, 52)
+ AES_KEXPAND2(46, 52, 54)
+ AES_KEXPAND1(48, 54, 0x6, 56)
+ AES_KEXPAND2(50, 56, 58)
+
+ std %f8, [%o1 + 0x00]
+ std %f10, [%o1 + 0x08]
+ std %f12, [%o1 + 0x10]
+ std %f14, [%o1 + 0x18]
+ std %f16, [%o1 + 0x20]
+ std %f18, [%o1 + 0x28]
+ std %f20, [%o1 + 0x30]
+ std %f22, [%o1 + 0x38]
+ std %f24, [%o1 + 0x40]
+ std %f26, [%o1 + 0x48]
+ std %f28, [%o1 + 0x50]
+ std %f30, [%o1 + 0x58]
+ std %f32, [%o1 + 0x60]
+ std %f34, [%o1 + 0x68]
+ std %f36, [%o1 + 0x70]
+ std %f38, [%o1 + 0x78]
+ std %f40, [%o1 + 0x80]
+ std %f42, [%o1 + 0x88]
+ std %f44, [%o1 + 0x90]
+ std %f46, [%o1 + 0x98]
+ std %f48, [%o1 + 0xa0]
+ std %f50, [%o1 + 0xa8]
+ std %f52, [%o1 + 0xb0]
+ std %f54, [%o1 + 0xb8]
+ std %f56, [%o1 + 0xc0]
+ ba,pt %xcc, 80f
+ std %f58, [%o1 + 0xc8]
+
+1:
+ /* 192-bit key expansion */
+ ld [%o0 + 0x10], %f4
+ ld [%o0 + 0x14], %f5
+
+ std %f4, [%o1 + 0x00]
+ add %o1, 0x08, %o1
+
+ AES_KEXPAND1(0, 4, 0x0, 6)
+ AES_KEXPAND2(2, 6, 8)
+ AES_KEXPAND2(4, 8, 10)
+ AES_KEXPAND1(6, 10, 0x1, 12)
+ AES_KEXPAND2(8, 12, 14)
+ AES_KEXPAND2(10, 14, 16)
+ AES_KEXPAND1(12, 16, 0x2, 18)
+ AES_KEXPAND2(14, 18, 20)
+ AES_KEXPAND2(16, 20, 22)
+ AES_KEXPAND1(18, 22, 0x3, 24)
+ AES_KEXPAND2(20, 24, 26)
+ AES_KEXPAND2(22, 26, 28)
+ AES_KEXPAND1(24, 28, 0x4, 30)
+ AES_KEXPAND2(26, 30, 32)
+ AES_KEXPAND2(28, 32, 34)
+ AES_KEXPAND1(30, 34, 0x5, 36)
+ AES_KEXPAND2(32, 36, 38)
+ AES_KEXPAND2(34, 38, 40)
+ AES_KEXPAND1(36, 40, 0x6, 42)
+ AES_KEXPAND2(38, 42, 44)
+ AES_KEXPAND2(40, 44, 46)
+ AES_KEXPAND1(42, 46, 0x7, 48)
+ AES_KEXPAND2(44, 48, 50)
+
+ std %f6, [%o1 + 0x00]
+ std %f8, [%o1 + 0x08]
+ std %f10, [%o1 + 0x10]
+ std %f12, [%o1 + 0x18]
+ std %f14, [%o1 + 0x20]
+ std %f16, [%o1 + 0x28]
+ std %f18, [%o1 + 0x30]
+ std %f20, [%o1 + 0x38]
+ std %f22, [%o1 + 0x40]
+ std %f24, [%o1 + 0x48]
+ std %f26, [%o1 + 0x50]
+ std %f28, [%o1 + 0x58]
+ std %f30, [%o1 + 0x60]
+ std %f32, [%o1 + 0x68]
+ std %f34, [%o1 + 0x70]
+ std %f36, [%o1 + 0x78]
+ std %f38, [%o1 + 0x80]
+ std %f40, [%o1 + 0x88]
+ std %f42, [%o1 + 0x90]
+ std %f44, [%o1 + 0x98]
+ std %f46, [%o1 + 0xa0]
+ std %f48, [%o1 + 0xa8]
+ ba,pt %xcc, 80f
+ std %f50, [%o1 + 0xb0]
+
+2:
+ /* 128-bit key expansion */
+ AES_KEXPAND1(0, 2, 0x0, 4)
+ AES_KEXPAND2(2, 4, 6)
+ AES_KEXPAND1(4, 6, 0x1, 8)
+ AES_KEXPAND2(6, 8, 10)
+ AES_KEXPAND1(8, 10, 0x2, 12)
+ AES_KEXPAND2(10, 12, 14)
+ AES_KEXPAND1(12, 14, 0x3, 16)
+ AES_KEXPAND2(14, 16, 18)
+ AES_KEXPAND1(16, 18, 0x4, 20)
+ AES_KEXPAND2(18, 20, 22)
+ AES_KEXPAND1(20, 22, 0x5, 24)
+ AES_KEXPAND2(22, 24, 26)
+ AES_KEXPAND1(24, 26, 0x6, 28)
+ AES_KEXPAND2(26, 28, 30)
+ AES_KEXPAND1(28, 30, 0x7, 32)
+ AES_KEXPAND2(30, 32, 34)
+ AES_KEXPAND1(32, 34, 0x8, 36)
+ AES_KEXPAND2(34, 36, 38)
+ AES_KEXPAND1(36, 38, 0x9, 40)
+ AES_KEXPAND2(38, 40, 42)
+
+ std %f4, [%o1 + 0x00]
+ std %f6, [%o1 + 0x08]
+ std %f8, [%o1 + 0x10]
+ std %f10, [%o1 + 0x18]
+ std %f12, [%o1 + 0x20]
+ std %f14, [%o1 + 0x28]
+ std %f16, [%o1 + 0x30]
+ std %f18, [%o1 + 0x38]
+ std %f20, [%o1 + 0x40]
+ std %f22, [%o1 + 0x48]
+ std %f24, [%o1 + 0x50]
+ std %f26, [%o1 + 0x58]
+ std %f28, [%o1 + 0x60]
+ std %f30, [%o1 + 0x68]
+ std %f32, [%o1 + 0x70]
+ std %f34, [%o1 + 0x78]
+ std %f36, [%o1 + 0x80]
+ std %f38, [%o1 + 0x88]
+ std %f40, [%o1 + 0x90]
+ std %f42, [%o1 + 0x98]
+80:
+ retl
+ VISExit
+ENDPROC(aes_sparc64_key_expand)
+
+ .align 32
+ENTRY(aes_sparc64_encrypt_128)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+ ldd [%o0 + 0x00], %f8
+ ldd [%o0 + 0x08], %f10
+ ldd [%o0 + 0x10], %f12
+ ldd [%o0 + 0x18], %f14
+ ldd [%o0 + 0x20], %f16
+ ldd [%o0 + 0x28], %f18
+ ldd [%o0 + 0x30], %f20
+ ldd [%o0 + 0x38], %f22
+ ldd [%o0 + 0x40], %f24
+ ldd [%o0 + 0x48], %f26
+ ldd [%o0 + 0x50], %f28
+ ldd [%o0 + 0x58], %f30
+ ldd [%o0 + 0x60], %f32
+ ldd [%o0 + 0x68], %f34
+ ldd [%o0 + 0x70], %f36
+ ldd [%o0 + 0x78], %f38
+ ldd [%o0 + 0x80], %f40
+ ldd [%o0 + 0x88], %f42
+ ldd [%o0 + 0x90], %f44
+ ldd [%o0 + 0x98], %f46
+ ldd [%o0 + 0xa0], %f48
+ ldd [%o0 + 0xa8], %f50
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+ ENCRYPT_128(12, 4, 6, 0, 2)
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+ retl
+ VISExit
+ENDPROC(aes_sparc64_encrypt_128)
+
+ .align 32
+ENTRY(aes_sparc64_encrypt_192)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+
+ ldd [%o0 + 0x00], %f8
+ ldd [%o0 + 0x08], %f10
+
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+
+ ldd [%o0 + 0x10], %f8
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ add %o0, 0x20, %o0
+
+ ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+ ldd [%o0 + 0x10], %f12
+ ldd [%o0 + 0x18], %f14
+ ldd [%o0 + 0x20], %f16
+ ldd [%o0 + 0x28], %f18
+ ldd [%o0 + 0x30], %f20
+ ldd [%o0 + 0x38], %f22
+ ldd [%o0 + 0x40], %f24
+ ldd [%o0 + 0x48], %f26
+ ldd [%o0 + 0x50], %f28
+ ldd [%o0 + 0x58], %f30
+ ldd [%o0 + 0x60], %f32
+ ldd [%o0 + 0x68], %f34
+ ldd [%o0 + 0x70], %f36
+ ldd [%o0 + 0x78], %f38
+ ldd [%o0 + 0x80], %f40
+ ldd [%o0 + 0x88], %f42
+ ldd [%o0 + 0x90], %f44
+ ldd [%o0 + 0x98], %f46
+ ldd [%o0 + 0xa0], %f48
+ ldd [%o0 + 0xa8], %f50
+
+
+ ENCRYPT_128(12, 4, 6, 0, 2)
+
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+
+ retl
+ VISExit
+ENDPROC(aes_sparc64_encrypt_192)
+
+ .align 32
+ENTRY(aes_sparc64_encrypt_256)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+
+ ldd [%o0 + 0x00], %f8
+ ldd [%o0 + 0x08], %f10
+
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+
+ ldd [%o0 + 0x10], %f8
+
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ add %o0, 0x20, %o0
+
+ ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+ ldd [%o0 + 0x10], %f8
+
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ add %o0, 0x20, %o0
+
+ ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+ ldd [%o0 + 0x10], %f12
+ ldd [%o0 + 0x18], %f14
+ ldd [%o0 + 0x20], %f16
+ ldd [%o0 + 0x28], %f18
+ ldd [%o0 + 0x30], %f20
+ ldd [%o0 + 0x38], %f22
+ ldd [%o0 + 0x40], %f24
+ ldd [%o0 + 0x48], %f26
+ ldd [%o0 + 0x50], %f28
+ ldd [%o0 + 0x58], %f30
+ ldd [%o0 + 0x60], %f32
+ ldd [%o0 + 0x68], %f34
+ ldd [%o0 + 0x70], %f36
+ ldd [%o0 + 0x78], %f38
+ ldd [%o0 + 0x80], %f40
+ ldd [%o0 + 0x88], %f42
+ ldd [%o0 + 0x90], %f44
+ ldd [%o0 + 0x98], %f46
+ ldd [%o0 + 0xa0], %f48
+ ldd [%o0 + 0xa8], %f50
+
+ ENCRYPT_128(12, 4, 6, 0, 2)
+
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+
+ retl
+ VISExit
+ENDPROC(aes_sparc64_encrypt_256)
+
+ .align 32
+ENTRY(aes_sparc64_decrypt_128)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+ ldd [%o0 + 0xa0], %f8
+ ldd [%o0 + 0xa8], %f10
+ ldd [%o0 + 0x98], %f12
+ ldd [%o0 + 0x90], %f14
+ ldd [%o0 + 0x88], %f16
+ ldd [%o0 + 0x80], %f18
+ ldd [%o0 + 0x78], %f20
+ ldd [%o0 + 0x70], %f22
+ ldd [%o0 + 0x68], %f24
+ ldd [%o0 + 0x60], %f26
+ ldd [%o0 + 0x58], %f28
+ ldd [%o0 + 0x50], %f30
+ ldd [%o0 + 0x48], %f32
+ ldd [%o0 + 0x40], %f34
+ ldd [%o0 + 0x38], %f36
+ ldd [%o0 + 0x30], %f38
+ ldd [%o0 + 0x28], %f40
+ ldd [%o0 + 0x20], %f42
+ ldd [%o0 + 0x18], %f44
+ ldd [%o0 + 0x10], %f46
+ ldd [%o0 + 0x08], %f48
+ ldd [%o0 + 0x00], %f50
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+ DECRYPT_128(12, 4, 6, 0, 2)
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+ retl
+ VISExit
+ENDPROC(aes_sparc64_decrypt_128)
+
+ .align 32
+ENTRY(aes_sparc64_decrypt_192)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+ ldd [%o0 + 0xc0], %f8
+ ldd [%o0 + 0xc8], %f10
+ ldd [%o0 + 0xb8], %f12
+ ldd [%o0 + 0xb0], %f14
+ ldd [%o0 + 0xa8], %f16
+ ldd [%o0 + 0xa0], %f18
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+ ldd [%o0 + 0x98], %f20
+ ldd [%o0 + 0x90], %f22
+ ldd [%o0 + 0x88], %f24
+ ldd [%o0 + 0x80], %f26
+ DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2)
+ ldd [%o0 + 0x78], %f28
+ ldd [%o0 + 0x70], %f30
+ ldd [%o0 + 0x68], %f32
+ ldd [%o0 + 0x60], %f34
+ ldd [%o0 + 0x58], %f36
+ ldd [%o0 + 0x50], %f38
+ ldd [%o0 + 0x48], %f40
+ ldd [%o0 + 0x40], %f42
+ ldd [%o0 + 0x38], %f44
+ ldd [%o0 + 0x30], %f46
+ ldd [%o0 + 0x28], %f48
+ ldd [%o0 + 0x20], %f50
+ ldd [%o0 + 0x18], %f52
+ ldd [%o0 + 0x10], %f54
+ ldd [%o0 + 0x08], %f56
+ ldd [%o0 + 0x00], %f58
+ DECRYPT_128(20, 4, 6, 0, 2)
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+ retl
+ VISExit
+ENDPROC(aes_sparc64_decrypt_192)
+
+ .align 32
+ENTRY(aes_sparc64_decrypt_256)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+ ldd [%o0 + 0xe0], %f8
+ ldd [%o0 + 0xe8], %f10
+ ldd [%o0 + 0xd8], %f12
+ ldd [%o0 + 0xd0], %f14
+ ldd [%o0 + 0xc8], %f16
+ fxor %f8, %f4, %f4
+ ldd [%o0 + 0xc0], %f18
+ fxor %f10, %f6, %f6
+ ldd [%o0 + 0xb8], %f20
+ AES_DROUND23(12, 4, 6, 2)
+ ldd [%o0 + 0xb0], %f22
+ AES_DROUND01(14, 4, 6, 0)
+ ldd [%o0 + 0xa8], %f24
+ AES_DROUND23(16, 0, 2, 6)
+ ldd [%o0 + 0xa0], %f26
+ AES_DROUND01(18, 0, 2, 4)
+ ldd [%o0 + 0x98], %f12
+ AES_DROUND23(20, 4, 6, 2)
+ ldd [%o0 + 0x90], %f14
+ AES_DROUND01(22, 4, 6, 0)
+ ldd [%o0 + 0x88], %f16
+ AES_DROUND23(24, 0, 2, 6)
+ ldd [%o0 + 0x80], %f18
+ AES_DROUND01(26, 0, 2, 4)
+ ldd [%o0 + 0x78], %f20
+ AES_DROUND23(12, 4, 6, 2)
+ ldd [%o0 + 0x70], %f22
+ AES_DROUND01(14, 4, 6, 0)
+ ldd [%o0 + 0x68], %f24
+ AES_DROUND23(16, 0, 2, 6)
+ ldd [%o0 + 0x60], %f26
+ AES_DROUND01(18, 0, 2, 4)
+ ldd [%o0 + 0x58], %f28
+ AES_DROUND23(20, 4, 6, 2)
+ ldd [%o0 + 0x50], %f30
+ AES_DROUND01(22, 4, 6, 0)
+ ldd [%o0 + 0x48], %f32
+ AES_DROUND23(24, 0, 2, 6)
+ ldd [%o0 + 0x40], %f34
+ AES_DROUND01(26, 0, 2, 4)
+ ldd [%o0 + 0x38], %f36
+ AES_DROUND23(28, 4, 6, 2)
+ ldd [%o0 + 0x30], %f38
+ AES_DROUND01(30, 4, 6, 0)
+ ldd [%o0 + 0x28], %f40
+ AES_DROUND23(32, 0, 2, 6)
+ ldd [%o0 + 0x20], %f42
+ AES_DROUND01(34, 0, 2, 4)
+ ldd [%o0 + 0x18], %f44
+ AES_DROUND23(36, 4, 6, 2)
+ ldd [%o0 + 0x10], %f46
+ AES_DROUND01(38, 4, 6, 0)
+ ldd [%o0 + 0x08], %f48
+ AES_DROUND23(40, 0, 2, 6)
+ ldd [%o0 + 0x00], %f50
+ AES_DROUND01(42, 0, 2, 4)
+ AES_DROUND23(44, 4, 6, 2)
+ AES_DROUND01(46, 4, 6, 0)
+ AES_DROUND23_L(48, 0, 2, 6)
+ AES_DROUND01_L(50, 0, 2, 4)
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+ retl
+ VISExit
+ENDPROC(aes_sparc64_decrypt_256)
+
+ .align 32
+ENTRY(aes_sparc64_load_encrypt_keys_128)
+ /* %o0=key */
+ VISEntry
+ ldd [%o0 + 0x10], %f8
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ ldd [%o0 + 0x30], %f16
+ ldd [%o0 + 0x38], %f18
+ ldd [%o0 + 0x40], %f20
+ ldd [%o0 + 0x48], %f22
+ ldd [%o0 + 0x50], %f24
+ ldd [%o0 + 0x58], %f26
+ ldd [%o0 + 0x60], %f28
+ ldd [%o0 + 0x68], %f30
+ ldd [%o0 + 0x70], %f32
+ ldd [%o0 + 0x78], %f34
+ ldd [%o0 + 0x80], %f36
+ ldd [%o0 + 0x88], %f38
+ ldd [%o0 + 0x90], %f40
+ ldd [%o0 + 0x98], %f42
+ ldd [%o0 + 0xa0], %f44
+ retl
+ ldd [%o0 + 0xa8], %f46
+ENDPROC(aes_sparc64_load_encrypt_keys_128)
+
+ .align 32
+ENTRY(aes_sparc64_load_encrypt_keys_192)
+ /* %o0=key */
+ VISEntry
+ ldd [%o0 + 0x10], %f8
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ ldd [%o0 + 0x30], %f16
+ ldd [%o0 + 0x38], %f18
+ ldd [%o0 + 0x40], %f20
+ ldd [%o0 + 0x48], %f22
+ ldd [%o0 + 0x50], %f24
+ ldd [%o0 + 0x58], %f26
+ ldd [%o0 + 0x60], %f28
+ ldd [%o0 + 0x68], %f30
+ ldd [%o0 + 0x70], %f32
+ ldd [%o0 + 0x78], %f34
+ ldd [%o0 + 0x80], %f36
+ ldd [%o0 + 0x88], %f38
+ ldd [%o0 + 0x90], %f40
+ ldd [%o0 + 0x98], %f42
+ ldd [%o0 + 0xa0], %f44
+ ldd [%o0 + 0xa8], %f46
+ ldd [%o0 + 0xb0], %f48
+ ldd [%o0 + 0xb8], %f50
+ ldd [%o0 + 0xc0], %f52
+ retl
+ ldd [%o0 + 0xc8], %f54
+ENDPROC(aes_sparc64_load_encrypt_keys_192)
+
+ .align 32
+ENTRY(aes_sparc64_load_encrypt_keys_256)
+ /* %o0=key */
+ VISEntry
+ ldd [%o0 + 0x10], %f8
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ ldd [%o0 + 0x30], %f16
+ ldd [%o0 + 0x38], %f18
+ ldd [%o0 + 0x40], %f20
+ ldd [%o0 + 0x48], %f22
+ ldd [%o0 + 0x50], %f24
+ ldd [%o0 + 0x58], %f26
+ ldd [%o0 + 0x60], %f28
+ ldd [%o0 + 0x68], %f30
+ ldd [%o0 + 0x70], %f32
+ ldd [%o0 + 0x78], %f34
+ ldd [%o0 + 0x80], %f36
+ ldd [%o0 + 0x88], %f38
+ ldd [%o0 + 0x90], %f40
+ ldd [%o0 + 0x98], %f42
+ ldd [%o0 + 0xa0], %f44
+ ldd [%o0 + 0xa8], %f46
+ ldd [%o0 + 0xb0], %f48
+ ldd [%o0 + 0xb8], %f50
+ ldd [%o0 + 0xc0], %f52
+ ldd [%o0 + 0xc8], %f54
+ ldd [%o0 + 0xd0], %f56
+ ldd [%o0 + 0xd8], %f58
+ ldd [%o0 + 0xe0], %f60
+ retl
+ ldd [%o0 + 0xe8], %f62
+ENDPROC(aes_sparc64_load_encrypt_keys_256)
+
+ .align 32
+ENTRY(aes_sparc64_load_decrypt_keys_128)
+ /* %o0=key */
+ VISEntry
+ ldd [%o0 + 0x98], %f8
+ ldd [%o0 + 0x90], %f10
+ ldd [%o0 + 0x88], %f12
+ ldd [%o0 + 0x80], %f14
+ ldd [%o0 + 0x78], %f16
+ ldd [%o0 + 0x70], %f18
+ ldd [%o0 + 0x68], %f20
+ ldd [%o0 + 0x60], %f22
+ ldd [%o0 + 0x58], %f24
+ ldd [%o0 + 0x50], %f26
+ ldd [%o0 + 0x48], %f28
+ ldd [%o0 + 0x40], %f30
+ ldd [%o0 + 0x38], %f32
+ ldd [%o0 + 0x30], %f34
+ ldd [%o0 + 0x28], %f36
+ ldd [%o0 + 0x20], %f38
+ ldd [%o0 + 0x18], %f40
+ ldd [%o0 + 0x10], %f42
+ ldd [%o0 + 0x08], %f44
+ retl
+ ldd [%o0 + 0x00], %f46
+ENDPROC(aes_sparc64_load_decrypt_keys_128)
+
+ .align 32
+ENTRY(aes_sparc64_load_decrypt_keys_192)
+ /* %o0=key */
+ VISEntry
+ ldd [%o0 + 0xb8], %f8
+ ldd [%o0 + 0xb0], %f10
+ ldd [%o0 + 0xa8], %f12
+ ldd [%o0 + 0xa0], %f14
+ ldd [%o0 + 0x98], %f16
+ ldd [%o0 + 0x90], %f18
+ ldd [%o0 + 0x88], %f20
+ ldd [%o0 + 0x80], %f22
+ ldd [%o0 + 0x78], %f24
+ ldd [%o0 + 0x70], %f26
+ ldd [%o0 + 0x68], %f28
+ ldd [%o0 + 0x60], %f30
+ ldd [%o0 + 0x58], %f32
+ ldd [%o0 + 0x50], %f34
+ ldd [%o0 + 0x48], %f36
+ ldd [%o0 + 0x40], %f38
+ ldd [%o0 + 0x38], %f40
+ ldd [%o0 + 0x30], %f42
+ ldd [%o0 + 0x28], %f44
+ ldd [%o0 + 0x20], %f46
+ ldd [%o0 + 0x18], %f48
+ ldd [%o0 + 0x10], %f50
+ ldd [%o0 + 0x08], %f52
+ retl
+ ldd [%o0 + 0x00], %f54
+ENDPROC(aes_sparc64_load_decrypt_keys_192)
+
+ .align 32
+ENTRY(aes_sparc64_load_decrypt_keys_256)
+ /* %o0=key */
+ VISEntry
+ ldd [%o0 + 0xd8], %f8
+ ldd [%o0 + 0xd0], %f10
+ ldd [%o0 + 0xc8], %f12
+ ldd [%o0 + 0xc0], %f14
+ ldd [%o0 + 0xb8], %f16
+ ldd [%o0 + 0xb0], %f18
+ ldd [%o0 + 0xa8], %f20
+ ldd [%o0 + 0xa0], %f22
+ ldd [%o0 + 0x98], %f24
+ ldd [%o0 + 0x90], %f26
+ ldd [%o0 + 0x88], %f28
+ ldd [%o0 + 0x80], %f30
+ ldd [%o0 + 0x78], %f32
+ ldd [%o0 + 0x70], %f34
+ ldd [%o0 + 0x68], %f36
+ ldd [%o0 + 0x60], %f38
+ ldd [%o0 + 0x58], %f40
+ ldd [%o0 + 0x50], %f42
+ ldd [%o0 + 0x48], %f44
+ ldd [%o0 + 0x40], %f46
+ ldd [%o0 + 0x38], %f48
+ ldd [%o0 + 0x30], %f50
+ ldd [%o0 + 0x28], %f52
+ ldd [%o0 + 0x20], %f54
+ ldd [%o0 + 0x18], %f56
+ ldd [%o0 + 0x10], %f58
+ ldd [%o0 + 0x08], %f60
+ retl
+ ldd [%o0 + 0x00], %f62
+ENDPROC(aes_sparc64_load_decrypt_keys_256)
+
+ .align 32
+ENTRY(aes_sparc64_ecb_encrypt_128)
+ /* %o0=key, %o1=input, %o2=output, %o3=len */
+ ldx [%o0 + 0x00], %g1
+ subcc %o3, 0x10, %o3
+ be 10f
+ ldx [%o0 + 0x08], %g2
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ ldx [%o1 + 0x10], %o4
+ ldx [%o1 + 0x18], %o5
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ xor %g1, %o4, %g3
+ xor %g2, %o5, %g7
+ MOVXTOD_G3_F60
+ MOVXTOD_G7_F62
+ ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ std %f60, [%o2 + 0x10]
+ std %f62, [%o2 + 0x18]
+ sub %o3, 0x20, %o3
+ add %o1, 0x20, %o1
+ brgz %o3, 1b
+ add %o2, 0x20, %o2
+ brlz,pt %o3, 11f
+ nop
+10: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ ENCRYPT_128(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+11: retl
+ nop
+ENDPROC(aes_sparc64_ecb_encrypt_128)
+
+ .align 32
+ENTRY(aes_sparc64_ecb_encrypt_192)
+ /* %o0=key, %o1=input, %o2=output, %o3=len */
+ ldx [%o0 + 0x00], %g1
+ subcc %o3, 0x10, %o3
+ be 10f
+ ldx [%o0 + 0x08], %g2
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ ldx [%o1 + 0x10], %o4
+ ldx [%o1 + 0x18], %o5
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ xor %g1, %o4, %g3
+ xor %g2, %o5, %g7
+ MOVXTOD_G3_F60
+ MOVXTOD_G7_F62
+ ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ std %f60, [%o2 + 0x10]
+ std %f62, [%o2 + 0x18]
+ sub %o3, 0x20, %o3
+ add %o1, 0x20, %o1
+ brgz %o3, 1b
+ add %o2, 0x20, %o2
+ brlz,pt %o3, 11f
+ nop
+10: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ ENCRYPT_192(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+11: retl
+ nop
+ENDPROC(aes_sparc64_ecb_encrypt_192)
+
+ .align 32
+ENTRY(aes_sparc64_ecb_encrypt_256)
+ /* %o0=key, %o1=input, %o2=output, %o3=len */
+ ldx [%o0 + 0x00], %g1
+ subcc %o3, 0x10, %o3
+ be 10f
+ ldx [%o0 + 0x08], %g2
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ ldx [%o1 + 0x10], %o4
+ ldx [%o1 + 0x18], %o5
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ xor %g1, %o4, %g3
+ xor %g2, %o5, %g7
+ MOVXTOD_G3_F0
+ MOVXTOD_G7_F2
+ ENCRYPT_256_2(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ std %f0, [%o2 + 0x10]
+ std %f2, [%o2 + 0x18]
+ sub %o3, 0x20, %o3
+ add %o1, 0x20, %o1
+ brgz %o3, 1b
+ add %o2, 0x20, %o2
+ brlz,pt %o3, 11f
+ nop
+10: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ ENCRYPT_256(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+11: retl
+ nop
+ENDPROC(aes_sparc64_ecb_encrypt_256)
+
+ .align 32
+ENTRY(aes_sparc64_ecb_decrypt_128)
+ /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
+ ldx [%o0 - 0x10], %g1
+ subcc %o3, 0x10, %o3
+ be 10f
+ ldx [%o0 - 0x08], %g2
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ ldx [%o1 + 0x10], %o4
+ ldx [%o1 + 0x18], %o5
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ xor %g1, %o4, %g3
+ xor %g2, %o5, %g7
+ MOVXTOD_G3_F60
+ MOVXTOD_G7_F62
+ DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ std %f60, [%o2 + 0x10]
+ std %f62, [%o2 + 0x18]
+ sub %o3, 0x20, %o3
+ add %o1, 0x20, %o1
+ brgz,pt %o3, 1b
+ add %o2, 0x20, %o2
+ brlz,pt %o3, 11f
+ nop
+10: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ DECRYPT_128(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+11: retl
+ nop
+ENDPROC(aes_sparc64_ecb_decrypt_128)
+
+ .align 32
+ENTRY(aes_sparc64_ecb_decrypt_192)
+ /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
+ ldx [%o0 - 0x10], %g1
+ subcc %o3, 0x10, %o3
+ be 10f
+ ldx [%o0 - 0x08], %g2
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ ldx [%o1 + 0x10], %o4
+ ldx [%o1 + 0x18], %o5
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ xor %g1, %o4, %g3
+ xor %g2, %o5, %g7
+ MOVXTOD_G3_F60
+ MOVXTOD_G7_F62
+ DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ std %f60, [%o2 + 0x10]
+ std %f62, [%o2 + 0x18]
+ sub %o3, 0x20, %o3
+ add %o1, 0x20, %o1
+ brgz,pt %o3, 1b
+ add %o2, 0x20, %o2
+ brlz,pt %o3, 11f
+ nop
+10: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ DECRYPT_192(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+11: retl
+ nop
+ENDPROC(aes_sparc64_ecb_decrypt_192)
+
+ .align 32
+ENTRY(aes_sparc64_ecb_decrypt_256)
+ /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
+ ldx [%o0 - 0x10], %g1
+ subcc %o3, 0x10, %o3
+ be 10f
+ ldx [%o0 - 0x08], %g2
+ sub %o0, 0xf0, %o0
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ ldx [%o1 + 0x10], %o4
+ ldx [%o1 + 0x18], %o5
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ xor %g1, %o4, %g3
+ xor %g2, %o5, %g7
+ MOVXTOD_G3_F0
+ MOVXTOD_G7_F2
+ DECRYPT_256_2(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ std %f0, [%o2 + 0x10]
+ std %f2, [%o2 + 0x18]
+ sub %o3, 0x20, %o3
+ add %o1, 0x20, %o1
+ brgz,pt %o3, 1b
+ add %o2, 0x20, %o2
+ brlz,pt %o3, 11f
+ nop
+10: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ DECRYPT_256(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+11: retl
+ nop
+ENDPROC(aes_sparc64_ecb_decrypt_256)
+
+ .align 32
+ENTRY(aes_sparc64_cbc_encrypt_128)
+ /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+ ldd [%o4 + 0x00], %f4
+ ldd [%o4 + 0x08], %f6
+ ldx [%o0 + 0x00], %g1
+ ldx [%o0 + 0x08], %g2
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ add %o1, 0x10, %o1
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F0
+ MOVXTOD_G7_F2
+ fxor %f4, %f0, %f4
+ fxor %f6, %f2, %f6
+ ENCRYPT_128(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ subcc %o3, 0x10, %o3
+ bne,pt %xcc, 1b
+ add %o2, 0x10, %o2
+ std %f4, [%o4 + 0x00]
+ std %f6, [%o4 + 0x08]
+ retl
+ nop
+ENDPROC(aes_sparc64_cbc_encrypt_128)
+
+ .align 32
+ENTRY(aes_sparc64_cbc_encrypt_192)
+ /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+ ldd [%o4 + 0x00], %f4
+ ldd [%o4 + 0x08], %f6
+ ldx [%o0 + 0x00], %g1
+ ldx [%o0 + 0x08], %g2
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ add %o1, 0x10, %o1
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F0
+ MOVXTOD_G7_F2
+ fxor %f4, %f0, %f4
+ fxor %f6, %f2, %f6
+ ENCRYPT_192(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ subcc %o3, 0x10, %o3
+ bne,pt %xcc, 1b
+ add %o2, 0x10, %o2
+ std %f4, [%o4 + 0x00]
+ std %f6, [%o4 + 0x08]
+ retl
+ nop
+ENDPROC(aes_sparc64_cbc_encrypt_192)
+
+ .align 32
+ENTRY(aes_sparc64_cbc_encrypt_256)
+ /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+ ldd [%o4 + 0x00], %f4
+ ldd [%o4 + 0x08], %f6
+ ldx [%o0 + 0x00], %g1
+ ldx [%o0 + 0x08], %g2
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ add %o1, 0x10, %o1
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F0
+ MOVXTOD_G7_F2
+ fxor %f4, %f0, %f4
+ fxor %f6, %f2, %f6
+ ENCRYPT_256(8, 4, 6, 0, 2)
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ subcc %o3, 0x10, %o3
+ bne,pt %xcc, 1b
+ add %o2, 0x10, %o2
+ std %f4, [%o4 + 0x00]
+ std %f6, [%o4 + 0x08]
+ retl
+ nop
+ENDPROC(aes_sparc64_cbc_encrypt_256)
+
+ .align 32
+ENTRY(aes_sparc64_cbc_decrypt_128)
+ /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
+ ldx [%o0 - 0x10], %g1
+ ldx [%o0 - 0x08], %g2
+ ldx [%o4 + 0x00], %o0
+ ldx [%o4 + 0x08], %o5
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ add %o1, 0x10, %o1
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ DECRYPT_128(8, 4, 6, 0, 2)
+ MOVXTOD_O0_F0
+ MOVXTOD_O5_F2
+ xor %g1, %g3, %o0
+ xor %g2, %g7, %o5
+ fxor %f4, %f0, %f4
+ fxor %f6, %f2, %f6
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ subcc %o3, 0x10, %o3
+ bne,pt %xcc, 1b
+ add %o2, 0x10, %o2
+ stx %o0, [%o4 + 0x00]
+ stx %o5, [%o4 + 0x08]
+ retl
+ nop
+ENDPROC(aes_sparc64_cbc_decrypt_128)
+
+ .align 32
+ENTRY(aes_sparc64_cbc_decrypt_192)
+ /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
+ ldx [%o0 - 0x10], %g1
+ ldx [%o0 - 0x08], %g2
+ ldx [%o4 + 0x00], %o0
+ ldx [%o4 + 0x08], %o5
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ add %o1, 0x10, %o1
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ DECRYPT_192(8, 4, 6, 0, 2)
+ MOVXTOD_O0_F0
+ MOVXTOD_O5_F2
+ xor %g1, %g3, %o0
+ xor %g2, %g7, %o5
+ fxor %f4, %f0, %f4
+ fxor %f6, %f2, %f6
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ subcc %o3, 0x10, %o3
+ bne,pt %xcc, 1b
+ add %o2, 0x10, %o2
+ stx %o0, [%o4 + 0x00]
+ stx %o5, [%o4 + 0x08]
+ retl
+ nop
+ENDPROC(aes_sparc64_cbc_decrypt_192)
+
+ .align 32
+ENTRY(aes_sparc64_cbc_decrypt_256)
+ /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
+ ldx [%o0 - 0x10], %g1
+ ldx [%o0 - 0x08], %g2
+ ldx [%o4 + 0x00], %o0
+ ldx [%o4 + 0x08], %o5
+1: ldx [%o1 + 0x00], %g3
+ ldx [%o1 + 0x08], %g7
+ add %o1, 0x10, %o1
+ xor %g1, %g3, %g3
+ xor %g2, %g7, %g7
+ MOVXTOD_G3_F4
+ MOVXTOD_G7_F6
+ DECRYPT_256(8, 4, 6, 0, 2)
+ MOVXTOD_O0_F0
+ MOVXTOD_O5_F2
+ xor %g1, %g3, %o0
+ xor %g2, %g7, %o5
+ fxor %f4, %f0, %f4
+ fxor %f6, %f2, %f6
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+ subcc %o3, 0x10, %o3
+ bne,pt %xcc, 1b
+ add %o2, 0x10, %o2
+ stx %o0, [%o4 + 0x00]
+ stx %o5, [%o4 + 0x08]
+ retl
+ nop
+ENDPROC(aes_sparc64_cbc_decrypt_256)
+
+ .align 32
+ENTRY(aes_sparc64_ctr_crypt_128)
+ /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+ ldx [%o4 + 0x00], %g3
+ ldx [%o4 + 0x08], %g7
+ subcc %o3, 0x10, %o3
+ ldx [%o0 + 0x00], %g1
+ be 10f
+ ldx [%o0 + 0x08], %g2
+1: xor %g1, %g3, %o5
+ MOVXTOD_O5_F0
+ xor %g2, %g7, %o5
+ MOVXTOD_O5_F2
+ add %g7, 1, %g7
+ add %g3, 1, %o5
+ movrz %g7, %o5, %g3
+ xor %g1, %g3, %o5
+ MOVXTOD_O5_F4
+ xor %g2, %g7, %o5
+ MOVXTOD_O5_F6
+ add %g7, 1, %g7
+ add %g3, 1, %o5
+ movrz %g7, %o5, %g3
+ ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
+ ldd [%o1 + 0x00], %f56
+ ldd [%o1 + 0x08], %f58
+ ldd [%o1 + 0x10], %f60
+ ldd [%o1 + 0x18], %f62
+ fxor %f56, %f0, %f56
+ fxor %f58, %f2, %f58
+ fxor %f60, %f4, %f60
+ fxor %f62, %f6, %f62
+ std %f56, [%o2 + 0x00]
+ std %f58, [%o2 + 0x08]
+ std %f60, [%o2 + 0x10]
+ std %f62, [%o2 + 0x18]
+ subcc %o3, 0x20, %o3
+ add %o1, 0x20, %o1
+ brgz %o3, 1b
+ add %o2, 0x20, %o2
+ brlz,pt %o3, 11f
+ nop
+10: xor %g1, %g3, %o5
+ MOVXTOD_O5_F0
+ xor %g2, %g7, %o5
+ MOVXTOD_O5_F2
+ add %g7, 1, %g7
+ add %g3, 1, %o5
+ movrz %g7, %o5, %g3
+ ENCRYPT_128(8, 0, 2, 4, 6)
+ ldd [%o1 + 0x00], %f4
+ ldd [%o1 + 0x08], %f6
+ fxor %f4, %f0, %f4
+ fxor %f6, %f2, %f6
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+11: stx %g3, [%o4 + 0x00]
+ retl
+ stx %g7, [%o4 + 0x08]
+ENDPROC(aes_sparc64_ctr_crypt_128)
+
+ .align 32
+ENTRY(aes_sparc64_ctr_crypt_192)
+ /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+ ldx [%o4 + 0x00], %g3
+ ldx [%o4 + 0x08], %g7
+ subcc %o3, 0x10, %o3
+ ldx [%o0 + 0x00], %g1
+ be 10f
+ ldx [%o0 + 0x08], %g2
+1: xor %g1, %g3, %o5
+ MOVXTOD_O5_F0
+ xor %g2, %g7, %o5
+ MOVXTOD_O5_F2
+ add %g7, 1, %g7
+ add %g3, 1, %o5
+ movrz %g7, %o5, %g3
+ xor %g1, %g3, %o5
+ MOVXTOD_O5_F4
+ xor %g2, %g7, %o5
+ MOVXTOD_O5_F6
+ add %g7, 1, %g7
+ add %g3, 1, %o5
+ movrz %g7, %o5, %g3
+ ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
+ ldd [%o1 + 0x00], %f56
+ ldd [%o1 + 0x08], %f58
+ ldd [%o1 + 0x10], %f60
+ ldd [%o1 + 0x18], %f62
+ fxor %f56, %f0, %f56
+ fxor %f58, %f2, %f58
+ fxor %f60, %f4, %f60
+ fxor %f62, %f6, %f62
+ std %f56, [%o2 + 0x00]
+ std %f58, [%o2 + 0x08]
+ std %f60, [%o2 + 0x10]
+ std %f62, [%o2 + 0x18]
+ subcc %o3, 0x20, %o3
+ add %o1, 0x20, %o1
+ brgz %o3, 1b
+ add %o2, 0x20, %o2
+ brlz,pt %o3, 11f
+ nop
+10: xor %g1, %g3, %o5
+ MOVXTOD_O5_F0
+ xor %g2, %g7, %o5
+ MOVXTOD_O5_F2
+ add %g7, 1, %g7
+ add %g3, 1, %o5
+ movrz %g7, %o5, %g3
+ ENCRYPT_192(8, 0, 2, 4, 6)
+ ldd [%o1 + 0x00], %f4
+ ldd [%o1 + 0x08], %f6
+ fxor %f4, %f0, %f4
+ fxor %f6, %f2, %f6
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+11: stx %g3, [%o4 + 0x00]
+ retl
+ stx %g7, [%o4 + 0x08]
+ENDPROC(aes_sparc64_ctr_crypt_192)
+
+ .align 32
+ENTRY(aes_sparc64_ctr_crypt_256)
+ /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+ ldx [%o4 + 0x00], %g3
+ ldx [%o4 + 0x08], %g7
+ subcc %o3, 0x10, %o3
+ ldx [%o0 + 0x00], %g1
+ be 10f
+ ldx [%o0 + 0x08], %g2
+1: xor %g1, %g3, %o5
+ MOVXTOD_O5_F0
+ xor %g2, %g7, %o5
+ MOVXTOD_O5_F2
+ add %g7, 1, %g7
+ add %g3, 1, %o5
+ movrz %g7, %o5, %g3
+ xor %g1, %g3, %o5
+ MOVXTOD_O5_F4
+ xor %g2, %g7, %o5
+ MOVXTOD_O5_F6
+ add %g7, 1, %g7
+ add %g3, 1, %o5
+ movrz %g7, %o5, %g3
+ ENCRYPT_256_2(8, 0, 2, 4, 6)
+ ldd [%o1 + 0x00], %f56
+ ldd [%o1 + 0x08], %f58
+ ldd [%o1 + 0x10], %f60
+ ldd [%o1 + 0x18], %f62
+ fxor %f56, %f0, %f56
+ fxor %f58, %f2, %f58
+ fxor %f60, %f4, %f60
+ fxor %f62, %f6, %f62
+ std %f56, [%o2 + 0x00]
+ std %f58, [%o2 + 0x08]
+ std %f60, [%o2 + 0x10]
+ std %f62, [%o2 + 0x18]
+ subcc %o3, 0x20, %o3
+ add %o1, 0x20, %o1
+ brgz %o3, 1b
+ add %o2, 0x20, %o2
+ brlz,pt %o3, 11f
+ nop
+ ldd [%o0 + 0xd0], %f56
+ ldd [%o0 + 0xd8], %f58
+ ldd [%o0 + 0xe0], %f60
+ ldd [%o0 + 0xe8], %f62
+10: xor %g1, %g3, %o5
+ MOVXTOD_O5_F0
+ xor %g2, %g7, %o5
+ MOVXTOD_O5_F2
+ add %g7, 1, %g7
+ add %g3, 1, %o5
+ movrz %g7, %o5, %g3
+ ENCRYPT_256(8, 0, 2, 4, 6)
+ ldd [%o1 + 0x00], %f4
+ ldd [%o1 + 0x08], %f6
+ fxor %f4, %f0, %f4
+ fxor %f6, %f2, %f6
+ std %f4, [%o2 + 0x00]
+ std %f6, [%o2 + 0x08]
+11: stx %g3, [%o4 + 0x00]
+ retl
+ stx %g7, [%o4 + 0x08]
+ENDPROC(aes_sparc64_ctr_crypt_256)
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
new file mode 100644
index 00000000000..8f1c9980f63
--- /dev/null
+++ b/arch/sparc/crypto/aes_glue.c
@@ -0,0 +1,477 @@
+/* Glue code for AES encryption optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon arch/x86/crypto/aesni-intel_glue.c
+ *
+ * Copyright (C) 2008, Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
+ * interface for 64-bit kernels.
+ * Authors: Adrian Hoban <adrian.hoban@intel.com>
+ * Gabriele Paoloni <gabriele.paoloni@intel.com>
+ * Tadeusz Struk (tadeusz.struk@intel.com)
+ * Aidan O'Mahony (aidan.o.mahony@intel.com)
+ * Copyright (c) 2010, Intel Corporation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+
+#include <asm/fpumacro.h>
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+struct aes_ops {
+ void (*encrypt)(const u64 *key, const u32 *input, u32 *output);
+ void (*decrypt)(const u64 *key, const u32 *input, u32 *output);
+ void (*load_encrypt_keys)(const u64 *key);
+ void (*load_decrypt_keys)(const u64 *key);
+ void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output,
+ unsigned int len);
+ void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output,
+ unsigned int len);
+ void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output,
+ unsigned int len, u64 *iv);
+ void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output,
+ unsigned int len, u64 *iv);
+ void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output,
+ unsigned int len, u64 *iv);
+};
+
+struct crypto_sparc64_aes_ctx {
+ struct aes_ops *ops;
+ u64 key[AES_MAX_KEYLENGTH / sizeof(u64)];
+ u32 key_length;
+ u32 expanded_key_length;
+};
+
+extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input,
+ u32 *output);
+extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input,
+ u32 *output);
+extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input,
+ u32 *output);
+
+extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input,
+ u32 *output);
+extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input,
+ u32 *output);
+extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input,
+ u32 *output);
+
+extern void aes_sparc64_load_encrypt_keys_128(const u64 *key);
+extern void aes_sparc64_load_encrypt_keys_192(const u64 *key);
+extern void aes_sparc64_load_encrypt_keys_256(const u64 *key);
+
+extern void aes_sparc64_load_decrypt_keys_128(const u64 *key);
+extern void aes_sparc64_load_decrypt_keys_192(const u64 *key);
+extern void aes_sparc64_load_decrypt_keys_256(const u64 *key);
+
+extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len);
+extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len);
+extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len);
+
+extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len);
+extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len);
+extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len);
+
+extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+
+extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+
+extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+
+extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+
+extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+
+extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+
+extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+
+struct aes_ops aes128_ops = {
+ .encrypt = aes_sparc64_encrypt_128,
+ .decrypt = aes_sparc64_decrypt_128,
+ .load_encrypt_keys = aes_sparc64_load_encrypt_keys_128,
+ .load_decrypt_keys = aes_sparc64_load_decrypt_keys_128,
+ .ecb_encrypt = aes_sparc64_ecb_encrypt_128,
+ .ecb_decrypt = aes_sparc64_ecb_decrypt_128,
+ .cbc_encrypt = aes_sparc64_cbc_encrypt_128,
+ .cbc_decrypt = aes_sparc64_cbc_decrypt_128,
+ .ctr_crypt = aes_sparc64_ctr_crypt_128,
+};
+
+struct aes_ops aes192_ops = {
+ .encrypt = aes_sparc64_encrypt_192,
+ .decrypt = aes_sparc64_decrypt_192,
+ .load_encrypt_keys = aes_sparc64_load_encrypt_keys_192,
+ .load_decrypt_keys = aes_sparc64_load_decrypt_keys_192,
+ .ecb_encrypt = aes_sparc64_ecb_encrypt_192,
+ .ecb_decrypt = aes_sparc64_ecb_decrypt_192,
+ .cbc_encrypt = aes_sparc64_cbc_encrypt_192,
+ .cbc_decrypt = aes_sparc64_cbc_decrypt_192,
+ .ctr_crypt = aes_sparc64_ctr_crypt_192,
+};
+
+struct aes_ops aes256_ops = {
+ .encrypt = aes_sparc64_encrypt_256,
+ .decrypt = aes_sparc64_decrypt_256,
+ .load_encrypt_keys = aes_sparc64_load_encrypt_keys_256,
+ .load_decrypt_keys = aes_sparc64_load_decrypt_keys_256,
+ .ecb_encrypt = aes_sparc64_ecb_encrypt_256,
+ .ecb_decrypt = aes_sparc64_ecb_decrypt_256,
+ .cbc_encrypt = aes_sparc64_cbc_encrypt_256,
+ .cbc_decrypt = aes_sparc64_cbc_decrypt_256,
+ .ctr_crypt = aes_sparc64_ctr_crypt_256,
+};
+
+extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key,
+ unsigned int key_len);
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ u32 *flags = &tfm->crt_flags;
+
+ switch (key_len) {
+ case AES_KEYSIZE_128:
+ ctx->expanded_key_length = 0xb0;
+ ctx->ops = &aes128_ops;
+ break;
+
+ case AES_KEYSIZE_192:
+ ctx->expanded_key_length = 0xd0;
+ ctx->ops = &aes192_ops;
+ break;
+
+ case AES_KEYSIZE_256:
+ ctx->expanded_key_length = 0xf0;
+ ctx->ops = &aes256_ops;
+ break;
+
+ default:
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len);
+ ctx->key_length = key_len;
+
+ return 0;
+}
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
+}
+
+#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
+
+static int ecb_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ ctx->ops->load_encrypt_keys(&ctx->key[0]);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ ctx->ops->ecb_encrypt(&ctx->key[0],
+ (const u64 *)walk.src.virt.addr,
+ (u64 *) walk.dst.virt.addr,
+ block_len);
+ }
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ u64 *key_end;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ ctx->ops->load_decrypt_keys(&ctx->key[0]);
+ key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ ctx->ops->ecb_decrypt(key_end,
+ (const u64 *) walk.src.virt.addr,
+ (u64 *) walk.dst.virt.addr, block_len);
+ }
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+
+ return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ ctx->ops->load_encrypt_keys(&ctx->key[0]);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ ctx->ops->cbc_encrypt(&ctx->key[0],
+ (const u64 *)walk.src.virt.addr,
+ (u64 *) walk.dst.virt.addr,
+ block_len, (u64 *) walk.iv);
+ }
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ u64 *key_end;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ ctx->ops->load_decrypt_keys(&ctx->key[0]);
+ key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ ctx->ops->cbc_decrypt(key_end,
+ (const u64 *) walk.src.virt.addr,
+ (u64 *) walk.dst.virt.addr,
+ block_len, (u64 *) walk.iv);
+ }
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+
+ return err;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ ctx->ops->load_encrypt_keys(&ctx->key[0]);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ ctx->ops->ctr_crypt(&ctx->key[0],
+ (const u64 *)walk.src.virt.addr,
+ (u64 *) walk.dst.virt.addr,
+ block_len, (u64 *) walk.iv);
+ }
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static struct crypto_alg algs[] = { {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .cra_alignmask = 3,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = aes_set_key,
+ .cia_encrypt = aes_encrypt,
+ .cia_decrypt = aes_decrypt
+ }
+ }
+}, {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+} };
+
+static bool __init sparc64_has_aes_opcode(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return false;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_AES))
+ return false;
+
+ return true;
+}
+
+static int __init aes_sparc64_mod_init(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(algs); i++)
+ INIT_LIST_HEAD(&algs[i].cra_list);
+
+ if (sparc64_has_aes_opcode()) {
+ pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
+ return crypto_register_algs(algs, ARRAY_SIZE(algs));
+ }
+ pr_info("sparc64 aes opcodes not available.\n");
+ return -ENODEV;
+}
+
+static void __exit aes_sparc64_mod_fini(void)
+{
+ crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+}
+
+module_init(aes_sparc64_mod_init);
+module_exit(aes_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated");
+
+MODULE_ALIAS("aes");
diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S
new file mode 100644
index 00000000000..cc39553a4e4
--- /dev/null
+++ b/arch/sparc/crypto/camellia_asm.S
@@ -0,0 +1,563 @@
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
+ CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \
+ CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \
+ CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \
+ CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \
+ CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \
+ CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
+
+#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
+ CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
+ CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
+ CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
+
+ .data
+
+ .align 8
+SIGMA: .xword 0xA09E667F3BCC908B
+ .xword 0xB67AE8584CAA73B2
+ .xword 0xC6EF372FE94F82BE
+ .xword 0x54FF53A5F1D36F1C
+ .xword 0x10E527FADE682D1D
+ .xword 0xB05688C2B3E6C1FD
+
+ .text
+
+ .align 32
+ENTRY(camellia_sparc64_key_expand)
+ /* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
+ VISEntry
+ ld [%o0 + 0x00], %f0 ! i0, k[0]
+ ld [%o0 + 0x04], %f1 ! i1, k[1]
+ ld [%o0 + 0x08], %f2 ! i2, k[2]
+ ld [%o0 + 0x0c], %f3 ! i3, k[3]
+ std %f0, [%o1 + 0x00] ! k[0, 1]
+ fsrc2 %f0, %f28
+ std %f2, [%o1 + 0x08] ! k[2, 3]
+ cmp %o2, 16
+ be 10f
+ fsrc2 %f2, %f30
+
+ ld [%o0 + 0x10], %f0
+ ld [%o0 + 0x14], %f1
+ std %f0, [%o1 + 0x20] ! k[8, 9]
+ cmp %o2, 24
+ fone %f10
+ be,a 1f
+ fxor %f10, %f0, %f2
+ ld [%o0 + 0x18], %f2
+ ld [%o0 + 0x1c], %f3
+1:
+ std %f2, [%o1 + 0x28] ! k[10, 11]
+ fxor %f28, %f0, %f0
+ fxor %f30, %f2, %f2
+
+10:
+ sethi %hi(SIGMA), %g3
+ or %g3, %lo(SIGMA), %g3
+ ldd [%g3 + 0x00], %f16
+ ldd [%g3 + 0x08], %f18
+ ldd [%g3 + 0x10], %f20
+ ldd [%g3 + 0x18], %f22
+ ldd [%g3 + 0x20], %f24
+ ldd [%g3 + 0x28], %f26
+ CAMELLIA_F(16, 2, 0, 2)
+ CAMELLIA_F(18, 0, 2, 0)
+ fxor %f28, %f0, %f0
+ fxor %f30, %f2, %f2
+ CAMELLIA_F(20, 2, 0, 2)
+ CAMELLIA_F(22, 0, 2, 0)
+
+#define ROTL128(S01, S23, TMP1, TMP2, N) \
+ srlx S01, (64 - N), TMP1; \
+ sllx S01, N, S01; \
+ srlx S23, (64 - N), TMP2; \
+ sllx S23, N, S23; \
+ or S01, TMP2, S01; \
+ or S23, TMP1, S23
+
+ cmp %o2, 16
+ bne 1f
+ nop
+ /* 128-bit key */
+ std %f0, [%o1 + 0x10] ! k[ 4, 5]
+ std %f2, [%o1 + 0x18] ! k[ 6, 7]
+ MOVDTOX_F0_O4
+ MOVDTOX_F2_O5
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o4, [%o1 + 0x30] ! k[12, 13]
+ stx %o5, [%o1 + 0x38] ! k[14, 15]
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o4, [%o1 + 0x40] ! k[16, 17]
+ stx %o5, [%o1 + 0x48] ! k[18, 19]
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o4, [%o1 + 0x60] ! k[24, 25]
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o4, [%o1 + 0x70] ! k[28, 29]
+ stx %o5, [%o1 + 0x78] ! k[30, 31]
+ ROTL128(%o4, %o5, %g2, %g3, 34)
+ stx %o4, [%o1 + 0xa0] ! k[40, 41]
+ stx %o5, [%o1 + 0xa8] ! k[42, 43]
+ ROTL128(%o4, %o5, %g2, %g3, 17)
+ stx %o4, [%o1 + 0xc0] ! k[48, 49]
+ stx %o5, [%o1 + 0xc8] ! k[50, 51]
+
+ ldx [%o1 + 0x00], %o4 ! k[ 0, 1]
+ ldx [%o1 + 0x08], %o5 ! k[ 2, 3]
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o4, [%o1 + 0x20] ! k[ 8, 9]
+ stx %o5, [%o1 + 0x28] ! k[10, 11]
+ ROTL128(%o4, %o5, %g2, %g3, 30)
+ stx %o4, [%o1 + 0x50] ! k[20, 21]
+ stx %o5, [%o1 + 0x58] ! k[22, 23]
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o5, [%o1 + 0x68] ! k[26, 27]
+ ROTL128(%o4, %o5, %g2, %g3, 17)
+ stx %o4, [%o1 + 0x80] ! k[32, 33]
+ stx %o5, [%o1 + 0x88] ! k[34, 35]
+ ROTL128(%o4, %o5, %g2, %g3, 17)
+ stx %o4, [%o1 + 0x90] ! k[36, 37]
+ stx %o5, [%o1 + 0x98] ! k[38, 39]
+ ROTL128(%o4, %o5, %g2, %g3, 17)
+ stx %o4, [%o1 + 0xb0] ! k[44, 45]
+ stx %o5, [%o1 + 0xb8] ! k[46, 47]
+
+ ba,pt %xcc, 2f
+ mov (3 * 16 * 4), %o0
+
+1:
+ /* 192-bit or 256-bit key */
+ std %f0, [%o1 + 0x30] ! k[12, 13]
+ std %f2, [%o1 + 0x38] ! k[14, 15]
+ ldd [%o1 + 0x20], %f4 ! k[ 8, 9]
+ ldd [%o1 + 0x28], %f6 ! k[10, 11]
+ fxor %f0, %f4, %f0
+ fxor %f2, %f6, %f2
+ CAMELLIA_F(24, 2, 0, 2)
+ CAMELLIA_F(26, 0, 2, 0)
+ std %f0, [%o1 + 0x10] ! k[ 4, 5]
+ std %f2, [%o1 + 0x18] ! k[ 6, 7]
+ MOVDTOX_F0_O4
+ MOVDTOX_F2_O5
+ ROTL128(%o4, %o5, %g2, %g3, 30)
+ stx %o4, [%o1 + 0x50] ! k[20, 21]
+ stx %o5, [%o1 + 0x58] ! k[22, 23]
+ ROTL128(%o4, %o5, %g2, %g3, 30)
+ stx %o4, [%o1 + 0xa0] ! k[40, 41]
+ stx %o5, [%o1 + 0xa8] ! k[42, 43]
+ ROTL128(%o4, %o5, %g2, %g3, 51)
+ stx %o4, [%o1 + 0x100] ! k[64, 65]
+ stx %o5, [%o1 + 0x108] ! k[66, 67]
+ ldx [%o1 + 0x20], %o4 ! k[ 8, 9]
+ ldx [%o1 + 0x28], %o5 ! k[10, 11]
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o4, [%o1 + 0x20] ! k[ 8, 9]
+ stx %o5, [%o1 + 0x28] ! k[10, 11]
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o4, [%o1 + 0x40] ! k[16, 17]
+ stx %o5, [%o1 + 0x48] ! k[18, 19]
+ ROTL128(%o4, %o5, %g2, %g3, 30)
+ stx %o4, [%o1 + 0x90] ! k[36, 37]
+ stx %o5, [%o1 + 0x98] ! k[38, 39]
+ ROTL128(%o4, %o5, %g2, %g3, 34)
+ stx %o4, [%o1 + 0xd0] ! k[52, 53]
+ stx %o5, [%o1 + 0xd8] ! k[54, 55]
+ ldx [%o1 + 0x30], %o4 ! k[12, 13]
+ ldx [%o1 + 0x38], %o5 ! k[14, 15]
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o4, [%o1 + 0x30] ! k[12, 13]
+ stx %o5, [%o1 + 0x38] ! k[14, 15]
+ ROTL128(%o4, %o5, %g2, %g3, 30)
+ stx %o4, [%o1 + 0x70] ! k[28, 29]
+ stx %o5, [%o1 + 0x78] ! k[30, 31]
+ srlx %o4, 32, %g2
+ srlx %o5, 32, %g3
+ stw %o4, [%o1 + 0xc0] ! k[48]
+ stw %g3, [%o1 + 0xc4] ! k[49]
+ stw %o5, [%o1 + 0xc8] ! k[50]
+ stw %g2, [%o1 + 0xcc] ! k[51]
+ ROTL128(%o4, %o5, %g2, %g3, 49)
+ stx %o4, [%o1 + 0xe0] ! k[56, 57]
+ stx %o5, [%o1 + 0xe8] ! k[58, 59]
+ ldx [%o1 + 0x00], %o4 ! k[ 0, 1]
+ ldx [%o1 + 0x08], %o5 ! k[ 2, 3]
+ ROTL128(%o4, %o5, %g2, %g3, 45)
+ stx %o4, [%o1 + 0x60] ! k[24, 25]
+ stx %o5, [%o1 + 0x68] ! k[26, 27]
+ ROTL128(%o4, %o5, %g2, %g3, 15)
+ stx %o4, [%o1 + 0x80] ! k[32, 33]
+ stx %o5, [%o1 + 0x88] ! k[34, 35]
+ ROTL128(%o4, %o5, %g2, %g3, 17)
+ stx %o4, [%o1 + 0xb0] ! k[44, 45]
+ stx %o5, [%o1 + 0xb8] ! k[46, 47]
+ ROTL128(%o4, %o5, %g2, %g3, 34)
+ stx %o4, [%o1 + 0xf0] ! k[60, 61]
+ stx %o5, [%o1 + 0xf8] ! k[62, 63]
+ mov (4 * 16 * 4), %o0
+2:
+ add %o1, %o0, %o1
+ ldd [%o1 + 0x00], %f0
+ ldd [%o1 + 0x08], %f2
+ std %f0, [%o3 + 0x00]
+ std %f2, [%o3 + 0x08]
+ add %o3, 0x10, %o3
+1:
+ sub %o1, (16 * 4), %o1
+ ldd [%o1 + 0x38], %f0
+ ldd [%o1 + 0x30], %f2
+ ldd [%o1 + 0x28], %f4
+ ldd [%o1 + 0x20], %f6
+ ldd [%o1 + 0x18], %f8
+ ldd [%o1 + 0x10], %f10
+ std %f0, [%o3 + 0x00]
+ std %f2, [%o3 + 0x08]
+ std %f4, [%o3 + 0x10]
+ std %f6, [%o3 + 0x18]
+ std %f8, [%o3 + 0x20]
+ std %f10, [%o3 + 0x28]
+
+ ldd [%o1 + 0x08], %f0
+ ldd [%o1 + 0x00], %f2
+ std %f0, [%o3 + 0x30]
+ std %f2, [%o3 + 0x38]
+ subcc %o0, (16 * 4), %o0
+ bne,pt %icc, 1b
+ add %o3, (16 * 4), %o3
+
+ std %f2, [%o3 - 0x10]
+ std %f0, [%o3 - 0x08]
+
+ retl
+ VISExit
+ENDPROC(camellia_sparc64_key_expand)
+
+ .align 32
+ENTRY(camellia_sparc64_crypt)
+ /* %o0=key, %o1=input, %o2=output, %o3=key_len */
+ VISEntry
+
+ ld [%o1 + 0x00], %f0
+ ld [%o1 + 0x04], %f1
+ ld [%o1 + 0x08], %f2
+ ld [%o1 + 0x0c], %f3
+
+ ldd [%o0 + 0x00], %f4
+ ldd [%o0 + 0x08], %f6
+
+ cmp %o3, 16
+ fxor %f4, %f0, %f0
+ be 1f
+ fxor %f6, %f2, %f2
+
+ ldd [%o0 + 0x10], %f8
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ ldd [%o0 + 0x30], %f16
+ ldd [%o0 + 0x38], %f18
+ ldd [%o0 + 0x40], %f20
+ ldd [%o0 + 0x48], %f22
+ add %o0, 0x40, %o0
+
+ CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+
+1:
+ ldd [%o0 + 0x10], %f8
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ ldd [%o0 + 0x30], %f16
+ ldd [%o0 + 0x38], %f18
+ ldd [%o0 + 0x40], %f20
+ ldd [%o0 + 0x48], %f22
+ ldd [%o0 + 0x50], %f24
+ ldd [%o0 + 0x58], %f26
+ ldd [%o0 + 0x60], %f28
+ ldd [%o0 + 0x68], %f30
+ ldd [%o0 + 0x70], %f32
+ ldd [%o0 + 0x78], %f34
+ ldd [%o0 + 0x80], %f36
+ ldd [%o0 + 0x88], %f38
+ ldd [%o0 + 0x90], %f40
+ ldd [%o0 + 0x98], %f42
+ ldd [%o0 + 0xa0], %f44
+ ldd [%o0 + 0xa8], %f46
+ ldd [%o0 + 0xb0], %f48
+ ldd [%o0 + 0xb8], %f50
+ ldd [%o0 + 0xc0], %f52
+ ldd [%o0 + 0xc8], %f54
+
+ CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+ CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+ CAMELLIA_6ROUNDS(40, 0, 2)
+ fxor %f52, %f2, %f2
+ fxor %f54, %f0, %f0
+
+ st %f2, [%o2 + 0x00]
+ st %f3, [%o2 + 0x04]
+ st %f0, [%o2 + 0x08]
+ st %f1, [%o2 + 0x0c]
+
+ retl
+ VISExit
+ENDPROC(camellia_sparc64_crypt)
+
+ .align 32
+ENTRY(camellia_sparc64_load_keys)
+ /* %o0=key, %o1=key_len */
+ VISEntry
+ ldd [%o0 + 0x00], %f4
+ ldd [%o0 + 0x08], %f6
+ ldd [%o0 + 0x10], %f8
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ ldd [%o0 + 0x30], %f16
+ ldd [%o0 + 0x38], %f18
+ ldd [%o0 + 0x40], %f20
+ ldd [%o0 + 0x48], %f22
+ ldd [%o0 + 0x50], %f24
+ ldd [%o0 + 0x58], %f26
+ ldd [%o0 + 0x60], %f28
+ ldd [%o0 + 0x68], %f30
+ ldd [%o0 + 0x70], %f32
+ ldd [%o0 + 0x78], %f34
+ ldd [%o0 + 0x80], %f36
+ ldd [%o0 + 0x88], %f38
+ ldd [%o0 + 0x90], %f40
+ ldd [%o0 + 0x98], %f42
+ ldd [%o0 + 0xa0], %f44
+ ldd [%o0 + 0xa8], %f46
+ ldd [%o0 + 0xb0], %f48
+ ldd [%o0 + 0xb8], %f50
+ ldd [%o0 + 0xc0], %f52
+ retl
+ ldd [%o0 + 0xc8], %f54
+ENDPROC(camellia_sparc64_load_keys)
+
+ .align 32
+ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
+ /* %o0=input, %o1=output, %o2=len, %o3=key */
+1: ldd [%o0 + 0x00], %f0
+ ldd [%o0 + 0x08], %f2
+ add %o0, 0x10, %o0
+ fxor %f4, %f0, %f0
+ fxor %f6, %f2, %f2
+ CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+ CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+ CAMELLIA_6ROUNDS(40, 0, 2)
+ fxor %f52, %f2, %f2
+ fxor %f54, %f0, %f0
+ std %f2, [%o1 + 0x00]
+ std %f0, [%o1 + 0x08]
+ subcc %o2, 0x10, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x10, %o1
+ retl
+ nop
+ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
+
+ .align 32
+ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
+ /* %o0=input, %o1=output, %o2=len, %o3=key */
+1: ldd [%o0 + 0x00], %f0
+ ldd [%o0 + 0x08], %f2
+ add %o0, 0x10, %o0
+ fxor %f4, %f0, %f0
+ fxor %f6, %f2, %f2
+ CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+ ldd [%o3 + 0xd0], %f8
+ ldd [%o3 + 0xd8], %f10
+ ldd [%o3 + 0xe0], %f12
+ ldd [%o3 + 0xe8], %f14
+ ldd [%o3 + 0xf0], %f16
+ ldd [%o3 + 0xf8], %f18
+ ldd [%o3 + 0x100], %f20
+ ldd [%o3 + 0x108], %f22
+ CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+ CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
+ CAMELLIA_F(8, 2, 0, 2)
+ CAMELLIA_F(10, 0, 2, 0)
+ ldd [%o3 + 0x10], %f8
+ ldd [%o3 + 0x18], %f10
+ CAMELLIA_F(12, 2, 0, 2)
+ CAMELLIA_F(14, 0, 2, 0)
+ ldd [%o3 + 0x20], %f12
+ ldd [%o3 + 0x28], %f14
+ CAMELLIA_F(16, 2, 0, 2)
+ CAMELLIA_F(18, 0, 2, 0)
+ ldd [%o3 + 0x30], %f16
+ ldd [%o3 + 0x38], %f18
+ fxor %f20, %f2, %f2
+ fxor %f22, %f0, %f0
+ ldd [%o3 + 0x40], %f20
+ ldd [%o3 + 0x48], %f22
+ std %f2, [%o1 + 0x00]
+ std %f0, [%o1 + 0x08]
+ subcc %o2, 0x10, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x10, %o1
+ retl
+ nop
+ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
+
+ .align 32
+ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
+ /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+ ldd [%o4 + 0x00], %f60
+ ldd [%o4 + 0x08], %f62
+1: ldd [%o0 + 0x00], %f0
+ ldd [%o0 + 0x08], %f2
+ add %o0, 0x10, %o0
+ fxor %f60, %f0, %f0
+ fxor %f62, %f2, %f2
+ fxor %f4, %f0, %f0
+ fxor %f6, %f2, %f2
+ CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+ CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+ CAMELLIA_6ROUNDS(40, 0, 2)
+ fxor %f52, %f2, %f60
+ fxor %f54, %f0, %f62
+ std %f60, [%o1 + 0x00]
+ std %f62, [%o1 + 0x08]
+ subcc %o2, 0x10, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x10, %o1
+ std %f60, [%o4 + 0x00]
+ retl
+ std %f62, [%o4 + 0x08]
+ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
+
+ .align 32
+ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
+ /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+ ldd [%o4 + 0x00], %f60
+ ldd [%o4 + 0x08], %f62
+1: ldd [%o0 + 0x00], %f0
+ ldd [%o0 + 0x08], %f2
+ add %o0, 0x10, %o0
+ fxor %f60, %f0, %f0
+ fxor %f62, %f2, %f2
+ fxor %f4, %f0, %f0
+ fxor %f6, %f2, %f2
+ CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+ ldd [%o3 + 0xd0], %f8
+ ldd [%o3 + 0xd8], %f10
+ ldd [%o3 + 0xe0], %f12
+ ldd [%o3 + 0xe8], %f14
+ ldd [%o3 + 0xf0], %f16
+ ldd [%o3 + 0xf8], %f18
+ ldd [%o3 + 0x100], %f20
+ ldd [%o3 + 0x108], %f22
+ CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+ CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
+ CAMELLIA_F(8, 2, 0, 2)
+ CAMELLIA_F(10, 0, 2, 0)
+ ldd [%o3 + 0x10], %f8
+ ldd [%o3 + 0x18], %f10
+ CAMELLIA_F(12, 2, 0, 2)
+ CAMELLIA_F(14, 0, 2, 0)
+ ldd [%o3 + 0x20], %f12
+ ldd [%o3 + 0x28], %f14
+ CAMELLIA_F(16, 2, 0, 2)
+ CAMELLIA_F(18, 0, 2, 0)
+ ldd [%o3 + 0x30], %f16
+ ldd [%o3 + 0x38], %f18
+ fxor %f20, %f2, %f60
+ fxor %f22, %f0, %f62
+ ldd [%o3 + 0x40], %f20
+ ldd [%o3 + 0x48], %f22
+ std %f60, [%o1 + 0x00]
+ std %f62, [%o1 + 0x08]
+ subcc %o2, 0x10, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x10, %o1
+ std %f60, [%o4 + 0x00]
+ retl
+ std %f62, [%o4 + 0x08]
+ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
+
+ .align 32
+ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
+ /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+ ldd [%o4 + 0x00], %f60
+ ldd [%o4 + 0x08], %f62
+1: ldd [%o0 + 0x00], %f56
+ ldd [%o0 + 0x08], %f58
+ add %o0, 0x10, %o0
+ fxor %f4, %f56, %f0
+ fxor %f6, %f58, %f2
+ CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+ CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+ CAMELLIA_6ROUNDS(40, 0, 2)
+ fxor %f52, %f2, %f2
+ fxor %f54, %f0, %f0
+ fxor %f60, %f2, %f2
+ fxor %f62, %f0, %f0
+ fsrc2 %f56, %f60
+ fsrc2 %f58, %f62
+ std %f2, [%o1 + 0x00]
+ std %f0, [%o1 + 0x08]
+ subcc %o2, 0x10, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x10, %o1
+ std %f60, [%o4 + 0x00]
+ retl
+ std %f62, [%o4 + 0x08]
+ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
+
+ .align 32
+ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
+ /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+ ldd [%o4 + 0x00], %f60
+ ldd [%o4 + 0x08], %f62
+1: ldd [%o0 + 0x00], %f56
+ ldd [%o0 + 0x08], %f58
+ add %o0, 0x10, %o0
+ fxor %f4, %f56, %f0
+ fxor %f6, %f58, %f2
+ CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+ ldd [%o3 + 0xd0], %f8
+ ldd [%o3 + 0xd8], %f10
+ ldd [%o3 + 0xe0], %f12
+ ldd [%o3 + 0xe8], %f14
+ ldd [%o3 + 0xf0], %f16
+ ldd [%o3 + 0xf8], %f18
+ ldd [%o3 + 0x100], %f20
+ ldd [%o3 + 0x108], %f22
+ CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+ CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
+ CAMELLIA_F(8, 2, 0, 2)
+ CAMELLIA_F(10, 0, 2, 0)
+ ldd [%o3 + 0x10], %f8
+ ldd [%o3 + 0x18], %f10
+ CAMELLIA_F(12, 2, 0, 2)
+ CAMELLIA_F(14, 0, 2, 0)
+ ldd [%o3 + 0x20], %f12
+ ldd [%o3 + 0x28], %f14
+ CAMELLIA_F(16, 2, 0, 2)
+ CAMELLIA_F(18, 0, 2, 0)
+ ldd [%o3 + 0x30], %f16
+ ldd [%o3 + 0x38], %f18
+ fxor %f20, %f2, %f2
+ fxor %f22, %f0, %f0
+ ldd [%o3 + 0x40], %f20
+ ldd [%o3 + 0x48], %f22
+ fxor %f60, %f2, %f2
+ fxor %f62, %f0, %f0
+ fsrc2 %f56, %f60
+ fsrc2 %f58, %f62
+ std %f2, [%o1 + 0x00]
+ std %f0, [%o1 + 0x08]
+ subcc %o2, 0x10, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x10, %o1
+ std %f60, [%o4 + 0x00]
+ retl
+ std %f62, [%o4 + 0x08]
+ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
new file mode 100644
index 00000000000..42905c08429
--- /dev/null
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -0,0 +1,322 @@
+/* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes.
+ *
+ * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+
+#include <asm/fpumacro.h>
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+#define CAMELLIA_MIN_KEY_SIZE 16
+#define CAMELLIA_MAX_KEY_SIZE 32
+#define CAMELLIA_BLOCK_SIZE 16
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+struct camellia_sparc64_ctx {
+ u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
+ u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
+ int key_len;
+};
+
+extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key,
+ unsigned int key_len, u64 *decrypt_key);
+
+static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
+ unsigned int key_len)
+{
+ struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+ const u32 *in_key = (const u32 *) _in_key;
+ u32 *flags = &tfm->crt_flags;
+
+ if (key_len != 16 && key_len != 24 && key_len != 32) {
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ ctx->key_len = key_len;
+
+ camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0],
+ key_len, &ctx->decrypt_key[0]);
+ return 0;
+}
+
+extern void camellia_sparc64_crypt(const u64 *key, const u32 *input,
+ u32 *output, unsigned int key_len);
+
+static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ camellia_sparc64_crypt(&ctx->encrypt_key[0],
+ (const u32 *) src,
+ (u32 *) dst, ctx->key_len);
+}
+
+static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ camellia_sparc64_crypt(&ctx->decrypt_key[0],
+ (const u32 *) src,
+ (u32 *) dst, ctx->key_len);
+}
+
+extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len);
+
+typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len,
+ const u64 *key);
+
+extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds;
+extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds;
+
+#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1))
+
+static int __ecb_crypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes, bool encrypt)
+{
+ struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ ecb_crypt_op *op;
+ const u64 *key;
+ int err;
+
+ op = camellia_sparc64_ecb_crypt_3_grand_rounds;
+ if (ctx->key_len != 16)
+ op = camellia_sparc64_ecb_crypt_4_grand_rounds;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ if (encrypt)
+ key = &ctx->encrypt_key[0];
+ else
+ key = &ctx->decrypt_key[0];
+ camellia_sparc64_load_keys(key, ctx->key_len);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ const u64 *src64;
+ u64 *dst64;
+
+ src64 = (const u64 *)walk.src.virt.addr;
+ dst64 = (u64 *) walk.dst.virt.addr;
+ op(src64, dst64, block_len, key);
+ }
+ nbytes &= CAMELLIA_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ return __ecb_crypt(desc, dst, src, nbytes, true);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ return __ecb_crypt(desc, dst, src, nbytes, false);
+}
+
+typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len,
+ const u64 *key, u64 *iv);
+
+extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds;
+extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds;
+extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds;
+extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds;
+
+static int cbc_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ cbc_crypt_op *op;
+ const u64 *key;
+ int err;
+
+ op = camellia_sparc64_cbc_encrypt_3_grand_rounds;
+ if (ctx->key_len != 16)
+ op = camellia_sparc64_cbc_encrypt_4_grand_rounds;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ key = &ctx->encrypt_key[0];
+ camellia_sparc64_load_keys(key, ctx->key_len);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ const u64 *src64;
+ u64 *dst64;
+
+ src64 = (const u64 *)walk.src.virt.addr;
+ dst64 = (u64 *) walk.dst.virt.addr;
+ op(src64, dst64, block_len, key,
+ (u64 *) walk.iv);
+ }
+ nbytes &= CAMELLIA_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ cbc_crypt_op *op;
+ const u64 *key;
+ int err;
+
+ op = camellia_sparc64_cbc_decrypt_3_grand_rounds;
+ if (ctx->key_len != 16)
+ op = camellia_sparc64_cbc_decrypt_4_grand_rounds;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ key = &ctx->decrypt_key[0];
+ camellia_sparc64_load_keys(key, ctx->key_len);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ const u64 *src64;
+ u64 *dst64;
+
+ src64 = (const u64 *)walk.src.virt.addr;
+ dst64 = (u64 *) walk.dst.virt.addr;
+ op(src64, dst64, block_len, key,
+ (u64 *) walk.iv);
+ }
+ nbytes &= CAMELLIA_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static struct crypto_alg algs[] = { {
+ .cra_name = "camellia",
+ .cra_driver_name = "camellia-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
+ .cra_alignmask = 3,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .cia_setkey = camellia_set_key,
+ .cia_encrypt = camellia_encrypt,
+ .cia_decrypt = camellia_decrypt
+ }
+ }
+}, {
+ .cra_name = "ecb(camellia)",
+ .cra_driver_name = "ecb-camellia-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_set_key,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(camellia)",
+ .cra_driver_name = "cbc-camellia-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_set_key,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}
+};
+
+static bool __init sparc64_has_camellia_opcode(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return false;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_CAMELLIA))
+ return false;
+
+ return true;
+}
+
+static int __init camellia_sparc64_mod_init(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(algs); i++)
+ INIT_LIST_HEAD(&algs[i].cra_list);
+
+ if (sparc64_has_camellia_opcode()) {
+ pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
+ return crypto_register_algs(algs, ARRAY_SIZE(algs));
+ }
+ pr_info("sparc64 camellia opcodes not available.\n");
+ return -ENODEV;
+}
+
+static void __exit camellia_sparc64_mod_fini(void)
+{
+ crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+}
+
+module_init(camellia_sparc64_mod_init);
+module_exit(camellia_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated");
+
+MODULE_ALIAS("aes");
diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S
new file mode 100644
index 00000000000..2b1976e765b
--- /dev/null
+++ b/arch/sparc/crypto/crc32c_asm.S
@@ -0,0 +1,20 @@
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+#include <asm/asi.h>
+
+#include "opcodes.h"
+
+ENTRY(crc32c_sparc64)
+ /* %o0=crc32p, %o1=data_ptr, %o2=len */
+ VISEntryHalf
+ lda [%o0] ASI_PL, %f1
+1: ldd [%o1], %f2
+ CRC32C(0,2,0)
+ subcc %o2, 8, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x8, %o1
+ sta %f1, [%o0] ASI_PL
+ VISExitHalf
+2: retl
+ nop
+ENDPROC(crc32c_sparc64)
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
new file mode 100644
index 00000000000..0bd89cea8d8
--- /dev/null
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -0,0 +1,179 @@
+/* Glue code for CRC32C optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon arch/x86/crypto/crc32c-intel.c
+ *
+ * Copyright (C) 2008 Intel Corporation
+ * Authors: Austin Zhang <austin_zhang@linux.intel.com>
+ * Kent Liu <kent.liu@intel.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/crc32.h>
+
+#include <crypto/internal/hash.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key,
+ unsigned int keylen)
+{
+ u32 *mctx = crypto_shash_ctx(hash);
+
+ if (keylen != sizeof(u32)) {
+ crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+ *(__le32 *)mctx = le32_to_cpup((__le32 *)key);
+ return 0;
+}
+
+static int crc32c_sparc64_init(struct shash_desc *desc)
+{
+ u32 *mctx = crypto_shash_ctx(desc->tfm);
+ u32 *crcp = shash_desc_ctx(desc);
+
+ *crcp = *mctx;
+
+ return 0;
+}
+
+extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len);
+
+static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len)
+{
+ unsigned int asm_len;
+
+ asm_len = len & ~7U;
+ if (asm_len) {
+ crc32c_sparc64(crcp, data, asm_len);
+ data += asm_len / 8;
+ len -= asm_len;
+ }
+ if (len)
+ *crcp = __crc32c_le(*crcp, (const unsigned char *) data, len);
+}
+
+static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ u32 *crcp = shash_desc_ctx(desc);
+
+ crc32c_compute(crcp, (const u64 *) data, len);
+
+ return 0;
+}
+
+static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len,
+ u8 *out)
+{
+ u32 tmp = *crcp;
+
+ crc32c_compute(&tmp, (const u64 *) data, len);
+
+ *(__le32 *) out = ~cpu_to_le32(tmp);
+ return 0;
+}
+
+static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+ u32 *crcp = shash_desc_ctx(desc);
+
+ *(__le32 *) out = ~cpu_to_le32p(crcp);
+ return 0;
+}
+
+static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len,
+ out);
+}
+
+static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm)
+{
+ u32 *key = crypto_tfm_ctx(tfm);
+
+ *key = ~0;
+
+ return 0;
+}
+
+#define CHKSUM_BLOCK_SIZE 1
+#define CHKSUM_DIGEST_SIZE 4
+
+static struct shash_alg alg = {
+ .setkey = crc32c_sparc64_setkey,
+ .init = crc32c_sparc64_init,
+ .update = crc32c_sparc64_update,
+ .final = crc32c_sparc64_final,
+ .finup = crc32c_sparc64_finup,
+ .digest = crc32c_sparc64_digest,
+ .descsize = sizeof(u32),
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .base = {
+ .cra_name = "crc32c",
+ .cra_driver_name = "crc32c-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(u32),
+ .cra_alignmask = 7,
+ .cra_module = THIS_MODULE,
+ .cra_init = crc32c_sparc64_cra_init,
+ }
+};
+
+static bool __init sparc64_has_crc32c_opcode(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return false;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_CRC32C))
+ return false;
+
+ return true;
+}
+
+static int __init crc32c_sparc64_mod_init(void)
+{
+ if (sparc64_has_crc32c_opcode()) {
+ pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
+ return crypto_register_shash(&alg);
+ }
+ pr_info("sparc64 crc32c opcode not available.\n");
+ return -ENODEV;
+}
+
+static void __exit crc32c_sparc64_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(crc32c_sparc64_mod_init);
+module_exit(crc32c_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
+
+MODULE_ALIAS("crc32c");
diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c
new file mode 100644
index 00000000000..5f5724a0ae2
--- /dev/null
+++ b/arch/sparc/crypto/crop_devid.c
@@ -0,0 +1,14 @@
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+/* This is a dummy device table linked into all of the crypto
+ * opcode drivers. It serves to trigger the module autoloading
+ * mechanisms in userspace which scan the OF device tree and
+ * load any modules which have device table entries that
+ * match OF device nodes.
+ */
+static const struct of_device_id crypto_opcode_match[] = {
+ { .name = "cpu", .compatible = "sun4v", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, crypto_opcode_match);
diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S
new file mode 100644
index 00000000000..30b6e90b28b
--- /dev/null
+++ b/arch/sparc/crypto/des_asm.S
@@ -0,0 +1,418 @@
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+ .align 32
+ENTRY(des_sparc64_key_expand)
+ /* %o0=input_key, %o1=output_key */
+ VISEntryHalf
+ ld [%o0 + 0x00], %f0
+ ld [%o0 + 0x04], %f1
+ DES_KEXPAND(0, 0, 0)
+ DES_KEXPAND(0, 1, 2)
+ DES_KEXPAND(2, 3, 6)
+ DES_KEXPAND(2, 2, 4)
+ DES_KEXPAND(6, 3, 10)
+ DES_KEXPAND(6, 2, 8)
+ DES_KEXPAND(10, 3, 14)
+ DES_KEXPAND(10, 2, 12)
+ DES_KEXPAND(14, 1, 16)
+ DES_KEXPAND(16, 3, 20)
+ DES_KEXPAND(16, 2, 18)
+ DES_KEXPAND(20, 3, 24)
+ DES_KEXPAND(20, 2, 22)
+ DES_KEXPAND(24, 3, 28)
+ DES_KEXPAND(24, 2, 26)
+ DES_KEXPAND(28, 1, 30)
+ std %f0, [%o1 + 0x00]
+ std %f2, [%o1 + 0x08]
+ std %f4, [%o1 + 0x10]
+ std %f6, [%o1 + 0x18]
+ std %f8, [%o1 + 0x20]
+ std %f10, [%o1 + 0x28]
+ std %f12, [%o1 + 0x30]
+ std %f14, [%o1 + 0x38]
+ std %f16, [%o1 + 0x40]
+ std %f18, [%o1 + 0x48]
+ std %f20, [%o1 + 0x50]
+ std %f22, [%o1 + 0x58]
+ std %f24, [%o1 + 0x60]
+ std %f26, [%o1 + 0x68]
+ std %f28, [%o1 + 0x70]
+ std %f30, [%o1 + 0x78]
+ retl
+ VISExitHalf
+ENDPROC(des_sparc64_key_expand)
+
+ .align 32
+ENTRY(des_sparc64_crypt)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ldd [%o1 + 0x00], %f32
+ ldd [%o0 + 0x00], %f0
+ ldd [%o0 + 0x08], %f2
+ ldd [%o0 + 0x10], %f4
+ ldd [%o0 + 0x18], %f6
+ ldd [%o0 + 0x20], %f8
+ ldd [%o0 + 0x28], %f10
+ ldd [%o0 + 0x30], %f12
+ ldd [%o0 + 0x38], %f14
+ ldd [%o0 + 0x40], %f16
+ ldd [%o0 + 0x48], %f18
+ ldd [%o0 + 0x50], %f20
+ ldd [%o0 + 0x58], %f22
+ ldd [%o0 + 0x60], %f24
+ ldd [%o0 + 0x68], %f26
+ ldd [%o0 + 0x70], %f28
+ ldd [%o0 + 0x78], %f30
+ DES_IP(32, 32)
+ DES_ROUND(0, 2, 32, 32)
+ DES_ROUND(4, 6, 32, 32)
+ DES_ROUND(8, 10, 32, 32)
+ DES_ROUND(12, 14, 32, 32)
+ DES_ROUND(16, 18, 32, 32)
+ DES_ROUND(20, 22, 32, 32)
+ DES_ROUND(24, 26, 32, 32)
+ DES_ROUND(28, 30, 32, 32)
+ DES_IIP(32, 32)
+ std %f32, [%o2 + 0x00]
+ retl
+ VISExit
+ENDPROC(des_sparc64_crypt)
+
+ .align 32
+ENTRY(des_sparc64_load_keys)
+ /* %o0=key */
+ VISEntry
+ ldd [%o0 + 0x00], %f0
+ ldd [%o0 + 0x08], %f2
+ ldd [%o0 + 0x10], %f4
+ ldd [%o0 + 0x18], %f6
+ ldd [%o0 + 0x20], %f8
+ ldd [%o0 + 0x28], %f10
+ ldd [%o0 + 0x30], %f12
+ ldd [%o0 + 0x38], %f14
+ ldd [%o0 + 0x40], %f16
+ ldd [%o0 + 0x48], %f18
+ ldd [%o0 + 0x50], %f20
+ ldd [%o0 + 0x58], %f22
+ ldd [%o0 + 0x60], %f24
+ ldd [%o0 + 0x68], %f26
+ ldd [%o0 + 0x70], %f28
+ retl
+ ldd [%o0 + 0x78], %f30
+ENDPROC(des_sparc64_load_keys)
+
+ .align 32
+ENTRY(des_sparc64_ecb_crypt)
+ /* %o0=input, %o1=output, %o2=len */
+1: ldd [%o0 + 0x00], %f32
+ add %o0, 0x08, %o0
+ DES_IP(32, 32)
+ DES_ROUND(0, 2, 32, 32)
+ DES_ROUND(4, 6, 32, 32)
+ DES_ROUND(8, 10, 32, 32)
+ DES_ROUND(12, 14, 32, 32)
+ DES_ROUND(16, 18, 32, 32)
+ DES_ROUND(20, 22, 32, 32)
+ DES_ROUND(24, 26, 32, 32)
+ DES_ROUND(28, 30, 32, 32)
+ DES_IIP(32, 32)
+ std %f32, [%o1 + 0x00]
+ subcc %o2, 0x08, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x08, %o1
+ retl
+ nop
+ENDPROC(des_sparc64_ecb_crypt)
+
+ .align 32
+ENTRY(des_sparc64_cbc_encrypt)
+ /* %o0=input, %o1=output, %o2=len, %o3=IV */
+ ldd [%o3 + 0x00], %f32
+1: ldd [%o0 + 0x00], %f34
+ fxor %f32, %f34, %f32
+ DES_IP(32, 32)
+ DES_ROUND(0, 2, 32, 32)
+ DES_ROUND(4, 6, 32, 32)
+ DES_ROUND(8, 10, 32, 32)
+ DES_ROUND(12, 14, 32, 32)
+ DES_ROUND(16, 18, 32, 32)
+ DES_ROUND(20, 22, 32, 32)
+ DES_ROUND(24, 26, 32, 32)
+ DES_ROUND(28, 30, 32, 32)
+ DES_IIP(32, 32)
+ std %f32, [%o1 + 0x00]
+ add %o0, 0x08, %o0
+ subcc %o2, 0x08, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x08, %o1
+ retl
+ std %f32, [%o3 + 0x00]
+ENDPROC(des_sparc64_cbc_encrypt)
+
+ .align 32
+ENTRY(des_sparc64_cbc_decrypt)
+ /* %o0=input, %o1=output, %o2=len, %o3=IV */
+ ldd [%o3 + 0x00], %f34
+1: ldd [%o0 + 0x00], %f36
+ DES_IP(36, 32)
+ DES_ROUND(0, 2, 32, 32)
+ DES_ROUND(4, 6, 32, 32)
+ DES_ROUND(8, 10, 32, 32)
+ DES_ROUND(12, 14, 32, 32)
+ DES_ROUND(16, 18, 32, 32)
+ DES_ROUND(20, 22, 32, 32)
+ DES_ROUND(24, 26, 32, 32)
+ DES_ROUND(28, 30, 32, 32)
+ DES_IIP(32, 32)
+ fxor %f32, %f34, %f32
+ fsrc2 %f36, %f34
+ std %f32, [%o1 + 0x00]
+ add %o0, 0x08, %o0
+ subcc %o2, 0x08, %o2
+ bne,pt %icc, 1b
+ add %o1, 0x08, %o1
+ retl
+ std %f36, [%o3 + 0x00]
+ENDPROC(des_sparc64_cbc_decrypt)
+
+ .align 32
+ENTRY(des3_ede_sparc64_crypt)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ldd [%o1 + 0x00], %f32
+ ldd [%o0 + 0x00], %f0
+ ldd [%o0 + 0x08], %f2
+ ldd [%o0 + 0x10], %f4
+ ldd [%o0 + 0x18], %f6
+ ldd [%o0 + 0x20], %f8
+ ldd [%o0 + 0x28], %f10
+ ldd [%o0 + 0x30], %f12
+ ldd [%o0 + 0x38], %f14
+ ldd [%o0 + 0x40], %f16
+ ldd [%o0 + 0x48], %f18
+ ldd [%o0 + 0x50], %f20
+ ldd [%o0 + 0x58], %f22
+ ldd [%o0 + 0x60], %f24
+ ldd [%o0 + 0x68], %f26
+ ldd [%o0 + 0x70], %f28
+ ldd [%o0 + 0x78], %f30
+ DES_IP(32, 32)
+ DES_ROUND(0, 2, 32, 32)
+ ldd [%o0 + 0x80], %f0
+ ldd [%o0 + 0x88], %f2
+ DES_ROUND(4, 6, 32, 32)
+ ldd [%o0 + 0x90], %f4
+ ldd [%o0 + 0x98], %f6
+ DES_ROUND(8, 10, 32, 32)
+ ldd [%o0 + 0xa0], %f8
+ ldd [%o0 + 0xa8], %f10
+ DES_ROUND(12, 14, 32, 32)
+ ldd [%o0 + 0xb0], %f12
+ ldd [%o0 + 0xb8], %f14
+ DES_ROUND(16, 18, 32, 32)
+ ldd [%o0 + 0xc0], %f16
+ ldd [%o0 + 0xc8], %f18
+ DES_ROUND(20, 22, 32, 32)
+ ldd [%o0 + 0xd0], %f20
+ ldd [%o0 + 0xd8], %f22
+ DES_ROUND(24, 26, 32, 32)
+ ldd [%o0 + 0xe0], %f24
+ ldd [%o0 + 0xe8], %f26
+ DES_ROUND(28, 30, 32, 32)
+ ldd [%o0 + 0xf0], %f28
+ ldd [%o0 + 0xf8], %f30
+ DES_IIP(32, 32)
+ DES_IP(32, 32)
+ DES_ROUND(0, 2, 32, 32)
+ ldd [%o0 + 0x100], %f0
+ ldd [%o0 + 0x108], %f2
+ DES_ROUND(4, 6, 32, 32)
+ ldd [%o0 + 0x110], %f4
+ ldd [%o0 + 0x118], %f6
+ DES_ROUND(8, 10, 32, 32)
+ ldd [%o0 + 0x120], %f8
+ ldd [%o0 + 0x128], %f10
+ DES_ROUND(12, 14, 32, 32)
+ ldd [%o0 + 0x130], %f12
+ ldd [%o0 + 0x138], %f14
+ DES_ROUND(16, 18, 32, 32)
+ ldd [%o0 + 0x140], %f16
+ ldd [%o0 + 0x148], %f18
+ DES_ROUND(20, 22, 32, 32)
+ ldd [%o0 + 0x150], %f20
+ ldd [%o0 + 0x158], %f22
+ DES_ROUND(24, 26, 32, 32)
+ ldd [%o0 + 0x160], %f24
+ ldd [%o0 + 0x168], %f26
+ DES_ROUND(28, 30, 32, 32)
+ ldd [%o0 + 0x170], %f28
+ ldd [%o0 + 0x178], %f30
+ DES_IIP(32, 32)
+ DES_IP(32, 32)
+ DES_ROUND(0, 2, 32, 32)
+ DES_ROUND(4, 6, 32, 32)
+ DES_ROUND(8, 10, 32, 32)
+ DES_ROUND(12, 14, 32, 32)
+ DES_ROUND(16, 18, 32, 32)
+ DES_ROUND(20, 22, 32, 32)
+ DES_ROUND(24, 26, 32, 32)
+ DES_ROUND(28, 30, 32, 32)
+ DES_IIP(32, 32)
+
+ std %f32, [%o2 + 0x00]
+ retl
+ VISExit
+ENDPROC(des3_ede_sparc64_crypt)
+
+ .align 32
+ENTRY(des3_ede_sparc64_load_keys)
+ /* %o0=key */
+ VISEntry
+ ldd [%o0 + 0x00], %f0
+ ldd [%o0 + 0x08], %f2
+ ldd [%o0 + 0x10], %f4
+ ldd [%o0 + 0x18], %f6
+ ldd [%o0 + 0x20], %f8
+ ldd [%o0 + 0x28], %f10
+ ldd [%o0 + 0x30], %f12
+ ldd [%o0 + 0x38], %f14
+ ldd [%o0 + 0x40], %f16
+ ldd [%o0 + 0x48], %f18
+ ldd [%o0 + 0x50], %f20
+ ldd [%o0 + 0x58], %f22
+ ldd [%o0 + 0x60], %f24
+ ldd [%o0 + 0x68], %f26
+ ldd [%o0 + 0x70], %f28
+ ldd [%o0 + 0x78], %f30
+ ldd [%o0 + 0x80], %f32
+ ldd [%o0 + 0x88], %f34
+ ldd [%o0 + 0x90], %f36
+ ldd [%o0 + 0x98], %f38
+ ldd [%o0 + 0xa0], %f40
+ ldd [%o0 + 0xa8], %f42
+ ldd [%o0 + 0xb0], %f44
+ ldd [%o0 + 0xb8], %f46
+ ldd [%o0 + 0xc0], %f48
+ ldd [%o0 + 0xc8], %f50
+ ldd [%o0 + 0xd0], %f52
+ ldd [%o0 + 0xd8], %f54
+ ldd [%o0 + 0xe0], %f56
+ retl
+ ldd [%o0 + 0xe8], %f58
+ENDPROC(des3_ede_sparc64_load_keys)
+
+#define DES3_LOOP_BODY(X) \
+ DES_IP(X, X) \
+ DES_ROUND(0, 2, X, X) \
+ DES_ROUND(4, 6, X, X) \
+ DES_ROUND(8, 10, X, X) \
+ DES_ROUND(12, 14, X, X) \
+ DES_ROUND(16, 18, X, X) \
+ ldd [%o0 + 0xf0], %f16; \
+ ldd [%o0 + 0xf8], %f18; \
+ DES_ROUND(20, 22, X, X) \
+ ldd [%o0 + 0x100], %f20; \
+ ldd [%o0 + 0x108], %f22; \
+ DES_ROUND(24, 26, X, X) \
+ ldd [%o0 + 0x110], %f24; \
+ ldd [%o0 + 0x118], %f26; \
+ DES_ROUND(28, 30, X, X) \
+ ldd [%o0 + 0x120], %f28; \
+ ldd [%o0 + 0x128], %f30; \
+ DES_IIP(X, X) \
+ DES_IP(X, X) \
+ DES_ROUND(32, 34, X, X) \
+ ldd [%o0 + 0x130], %f0; \
+ ldd [%o0 + 0x138], %f2; \
+ DES_ROUND(36, 38, X, X) \
+ ldd [%o0 + 0x140], %f4; \
+ ldd [%o0 + 0x148], %f6; \
+ DES_ROUND(40, 42, X, X) \
+ ldd [%o0 + 0x150], %f8; \
+ ldd [%o0 + 0x158], %f10; \
+ DES_ROUND(44, 46, X, X) \
+ ldd [%o0 + 0x160], %f12; \
+ ldd [%o0 + 0x168], %f14; \
+ DES_ROUND(48, 50, X, X) \
+ DES_ROUND(52, 54, X, X) \
+ DES_ROUND(56, 58, X, X) \
+ DES_ROUND(16, 18, X, X) \
+ ldd [%o0 + 0x170], %f16; \
+ ldd [%o0 + 0x178], %f18; \
+ DES_IIP(X, X) \
+ DES_IP(X, X) \
+ DES_ROUND(20, 22, X, X) \
+ ldd [%o0 + 0x50], %f20; \
+ ldd [%o0 + 0x58], %f22; \
+ DES_ROUND(24, 26, X, X) \
+ ldd [%o0 + 0x60], %f24; \
+ ldd [%o0 + 0x68], %f26; \
+ DES_ROUND(28, 30, X, X) \
+ ldd [%o0 + 0x70], %f28; \
+ ldd [%o0 + 0x78], %f30; \
+ DES_ROUND(0, 2, X, X) \
+ ldd [%o0 + 0x00], %f0; \
+ ldd [%o0 + 0x08], %f2; \
+ DES_ROUND(4, 6, X, X) \
+ ldd [%o0 + 0x10], %f4; \
+ ldd [%o0 + 0x18], %f6; \
+ DES_ROUND(8, 10, X, X) \
+ ldd [%o0 + 0x20], %f8; \
+ ldd [%o0 + 0x28], %f10; \
+ DES_ROUND(12, 14, X, X) \
+ ldd [%o0 + 0x30], %f12; \
+ ldd [%o0 + 0x38], %f14; \
+ DES_ROUND(16, 18, X, X) \
+ ldd [%o0 + 0x40], %f16; \
+ ldd [%o0 + 0x48], %f18; \
+ DES_IIP(X, X)
+
+ .align 32
+ENTRY(des3_ede_sparc64_ecb_crypt)
+ /* %o0=key, %o1=input, %o2=output, %o3=len */
+1: ldd [%o1 + 0x00], %f60
+ DES3_LOOP_BODY(60)
+ std %f60, [%o2 + 0x00]
+ subcc %o3, 0x08, %o3
+ bne,pt %icc, 1b
+ add %o2, 0x08, %o2
+ retl
+ nop
+ENDPROC(des3_ede_sparc64_ecb_crypt)
+
+ .align 32
+ENTRY(des3_ede_sparc64_cbc_encrypt)
+ /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+ ldd [%o4 + 0x00], %f60
+1: ldd [%o1 + 0x00], %f62
+ fxor %f60, %f62, %f60
+ DES3_LOOP_BODY(60)
+ std %f60, [%o2 + 0x00]
+ add %o1, 0x08, %o1
+ subcc %o3, 0x08, %o3
+ bne,pt %icc, 1b
+ add %o2, 0x08, %o2
+ retl
+ std %f60, [%o4 + 0x00]
+ENDPROC(des3_ede_sparc64_cbc_encrypt)
+
+ .align 32
+ENTRY(des3_ede_sparc64_cbc_decrypt)
+ /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+ ldd [%o4 + 0x00], %f62
+1: ldx [%o1 + 0x00], %g1
+ MOVXTOD_G1_F60
+ DES3_LOOP_BODY(60)
+ fxor %f62, %f60, %f60
+ MOVXTOD_G1_F62
+ std %f60, [%o2 + 0x00]
+ add %o1, 0x08, %o1
+ subcc %o3, 0x08, %o3
+ bne,pt %icc, 1b
+ add %o2, 0x08, %o2
+ retl
+ stx %g1, [%o4 + 0x00]
+ENDPROC(des3_ede_sparc64_cbc_decrypt)
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
new file mode 100644
index 00000000000..c4940c2d307
--- /dev/null
+++ b/arch/sparc/crypto/des_glue.c
@@ -0,0 +1,529 @@
+/* Glue code for DES encryption optimized for sparc64 crypto opcodes.
+ *
+ * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/des.h>
+
+#include <asm/fpumacro.h>
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+struct des_sparc64_ctx {
+ u64 encrypt_expkey[DES_EXPKEY_WORDS / 2];
+ u64 decrypt_expkey[DES_EXPKEY_WORDS / 2];
+};
+
+struct des3_ede_sparc64_ctx {
+ u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
+ u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
+};
+
+static void encrypt_to_decrypt(u64 *d, const u64 *e)
+{
+ const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1;
+ int i;
+
+ for (i = 0; i < DES_EXPKEY_WORDS / 2; i++)
+ *d++ = *s--;
+}
+
+extern void des_sparc64_key_expand(const u32 *input_key, u64 *key);
+
+static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
+ u32 *flags = &tfm->crt_flags;
+ u32 tmp[DES_EXPKEY_WORDS];
+ int ret;
+
+ /* Even though we have special instructions for key expansion,
+ * we call des_ekey() so that we don't have to write our own
+ * weak key detection code.
+ */
+ ret = des_ekey(tmp, key);
+ if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+ *flags |= CRYPTO_TFM_RES_WEAK_KEY;
+ return -EINVAL;
+ }
+
+ des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]);
+ encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]);
+
+ return 0;
+}
+
+extern void des_sparc64_crypt(const u64 *key, const u64 *input,
+ u64 *output);
+
+static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+ const u64 *K = ctx->encrypt_expkey;
+
+ des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
+}
+
+static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+ const u64 *K = ctx->decrypt_expkey;
+
+ des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
+}
+
+extern void des_sparc64_load_keys(const u64 *key);
+
+extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output,
+ unsigned int len);
+
+#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1))
+
+static int __ecb_crypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes, bool encrypt)
+{
+ struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ if (encrypt)
+ des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
+ else
+ des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr,
+ (u64 *) walk.dst.virt.addr,
+ block_len);
+ }
+ nbytes &= DES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ return __ecb_crypt(desc, dst, src, nbytes, true);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ return __ecb_crypt(desc, dst, src, nbytes, false);
+}
+
+extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output,
+ unsigned int len, u64 *iv);
+
+static int cbc_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr,
+ (u64 *) walk.dst.virt.addr,
+ block_len, (u64 *) walk.iv);
+ }
+ nbytes &= DES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
+ unsigned int len, u64 *iv);
+
+static int cbc_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr,
+ (u64 *) walk.dst.virt.addr,
+ block_len, (u64 *) walk.iv);
+ }
+ nbytes &= DES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
+ const u32 *K = (const u32 *)key;
+ u32 *flags = &tfm->crt_flags;
+ u64 k1[DES_EXPKEY_WORDS / 2];
+ u64 k2[DES_EXPKEY_WORDS / 2];
+ u64 k3[DES_EXPKEY_WORDS / 2];
+
+ if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
+ !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
+ (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+ *flags |= CRYPTO_TFM_RES_WEAK_KEY;
+ return -EINVAL;
+ }
+
+ des_sparc64_key_expand((const u32 *)key, k1);
+ key += DES_KEY_SIZE;
+ des_sparc64_key_expand((const u32 *)key, k2);
+ key += DES_KEY_SIZE;
+ des_sparc64_key_expand((const u32 *)key, k3);
+
+ memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1));
+ encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]);
+ memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
+ &k3[0], sizeof(k3));
+
+ encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]);
+ memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2],
+ &k2[0], sizeof(k2));
+ encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
+ &k1[0]);
+
+ return 0;
+}
+
+extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
+ u64 *output);
+
+static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+ const u64 *K = ctx->encrypt_expkey;
+
+ des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
+}
+
+static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+ const u64 *K = ctx->decrypt_expkey;
+
+ des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
+}
+
+extern void des3_ede_sparc64_load_keys(const u64 *key);
+
+extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input,
+ u64 *output, unsigned int len);
+
+static int __ecb3_crypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes, bool encrypt)
+{
+ struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ const u64 *K;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ if (encrypt)
+ K = &ctx->encrypt_expkey[0];
+ else
+ K = &ctx->decrypt_expkey[0];
+ des3_ede_sparc64_load_keys(K);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ const u64 *src64 = (const u64 *)walk.src.virt.addr;
+ des3_ede_sparc64_ecb_crypt(K, src64,
+ (u64 *) walk.dst.virt.addr,
+ block_len);
+ }
+ nbytes &= DES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static int ecb3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ return __ecb3_crypt(desc, dst, src, nbytes, true);
+}
+
+static int ecb3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ return __ecb3_crypt(desc, dst, src, nbytes, false);
+}
+
+extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+
+static int cbc3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ const u64 *K;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ K = &ctx->encrypt_expkey[0];
+ des3_ede_sparc64_load_keys(K);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ const u64 *src64 = (const u64 *)walk.src.virt.addr;
+ des3_ede_sparc64_cbc_encrypt(K, src64,
+ (u64 *) walk.dst.virt.addr,
+ block_len,
+ (u64 *) walk.iv);
+ }
+ nbytes &= DES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input,
+ u64 *output, unsigned int len,
+ u64 *iv);
+
+static int cbc3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ const u64 *K;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ K = &ctx->decrypt_expkey[0];
+ des3_ede_sparc64_load_keys(K);
+ while ((nbytes = walk.nbytes)) {
+ unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+ if (likely(block_len)) {
+ const u64 *src64 = (const u64 *)walk.src.virt.addr;
+ des3_ede_sparc64_cbc_decrypt(K, src64,
+ (u64 *) walk.dst.virt.addr,
+ block_len,
+ (u64 *) walk.iv);
+ }
+ nbytes &= DES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ fprs_write(0);
+ return err;
+}
+
+static struct crypto_alg algs[] = { {
+ .cra_name = "des",
+ .cra_driver_name = "des-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = DES_KEY_SIZE,
+ .cia_max_keysize = DES_KEY_SIZE,
+ .cia_setkey = des_set_key,
+ .cia_encrypt = des_encrypt,
+ .cia_decrypt = des_decrypt
+ }
+ }
+}, {
+ .cra_name = "ecb(des)",
+ .cra_driver_name = "ecb-des-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .setkey = des_set_key,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(des)",
+ .cra_driver_name = "cbc-des-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .setkey = des_set_key,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "des3_ede",
+ .cra_driver_name = "des3_ede-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = DES3_EDE_KEY_SIZE,
+ .cia_max_keysize = DES3_EDE_KEY_SIZE,
+ .cia_setkey = des3_ede_set_key,
+ .cia_encrypt = des3_ede_encrypt,
+ .cia_decrypt = des3_ede_decrypt
+ }
+ }
+}, {
+ .cra_name = "ecb(des3_ede)",
+ .cra_driver_name = "ecb-des3_ede-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .setkey = des3_ede_set_key,
+ .encrypt = ecb3_encrypt,
+ .decrypt = ecb3_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(des3_ede)",
+ .cra_driver_name = "cbc-des3_ede-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .setkey = des3_ede_set_key,
+ .encrypt = cbc3_encrypt,
+ .decrypt = cbc3_decrypt,
+ },
+ },
+} };
+
+static bool __init sparc64_has_des_opcode(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return false;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_DES))
+ return false;
+
+ return true;
+}
+
+static int __init des_sparc64_mod_init(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(algs); i++)
+ INIT_LIST_HEAD(&algs[i].cra_list);
+
+ if (sparc64_has_des_opcode()) {
+ pr_info("Using sparc64 des opcodes optimized DES implementation\n");
+ return crypto_register_algs(algs, ARRAY_SIZE(algs));
+ }
+ pr_info("sparc64 des opcodes not available.\n");
+ return -ENODEV;
+}
+
+static void __exit des_sparc64_mod_fini(void)
+{
+ crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+}
+
+module_init(des_sparc64_mod_init);
+module_exit(des_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated");
+
+MODULE_ALIAS("des");
diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S
new file mode 100644
index 00000000000..3150404e602
--- /dev/null
+++ b/arch/sparc/crypto/md5_asm.S
@@ -0,0 +1,70 @@
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+ENTRY(md5_sparc64_transform)
+ /* %o0 = digest, %o1 = data, %o2 = rounds */
+ VISEntryHalf
+ ld [%o0 + 0x00], %f0
+ ld [%o0 + 0x04], %f1
+ andcc %o1, 0x7, %g0
+ ld [%o0 + 0x08], %f2
+ bne,pn %xcc, 10f
+ ld [%o0 + 0x0c], %f3
+
+1:
+ ldd [%o1 + 0x00], %f8
+ ldd [%o1 + 0x08], %f10
+ ldd [%o1 + 0x10], %f12
+ ldd [%o1 + 0x18], %f14
+ ldd [%o1 + 0x20], %f16
+ ldd [%o1 + 0x28], %f18
+ ldd [%o1 + 0x30], %f20
+ ldd [%o1 + 0x38], %f22
+
+ MD5
+
+ subcc %o2, 1, %o2
+ bne,pt %xcc, 1b
+ add %o1, 0x40, %o1
+
+5:
+ st %f0, [%o0 + 0x00]
+ st %f1, [%o0 + 0x04]
+ st %f2, [%o0 + 0x08]
+ st %f3, [%o0 + 0x0c]
+ retl
+ VISExitHalf
+10:
+ alignaddr %o1, %g0, %o1
+
+ ldd [%o1 + 0x00], %f10
+1:
+ ldd [%o1 + 0x08], %f12
+ ldd [%o1 + 0x10], %f14
+ ldd [%o1 + 0x18], %f16
+ ldd [%o1 + 0x20], %f18
+ ldd [%o1 + 0x28], %f20
+ ldd [%o1 + 0x30], %f22
+ ldd [%o1 + 0x38], %f24
+ ldd [%o1 + 0x40], %f26
+
+ faligndata %f10, %f12, %f8
+ faligndata %f12, %f14, %f10
+ faligndata %f14, %f16, %f12
+ faligndata %f16, %f18, %f14
+ faligndata %f18, %f20, %f16
+ faligndata %f20, %f22, %f18
+ faligndata %f22, %f24, %f20
+ faligndata %f24, %f26, %f22
+
+ MD5
+
+ subcc %o2, 1, %o2
+ fsrc2 %f26, %f10
+ bne,pt %xcc, 1b
+ add %o1, 0x40, %o1
+
+ ba,a,pt %xcc, 5b
+ENDPROC(md5_sparc64_transform)
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
new file mode 100644
index 00000000000..603d723038c
--- /dev/null
+++ b/arch/sparc/crypto/md5_glue.c
@@ -0,0 +1,188 @@
+/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
+ * and crypto/md5.c which are:
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ * Copyright (c) Cryptoapi developers.
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/md5.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+asmlinkage void md5_sparc64_transform(u32 *digest, const char *data,
+ unsigned int rounds);
+
+static int md5_sparc64_init(struct shash_desc *desc)
+{
+ struct md5_state *mctx = shash_desc_ctx(desc);
+
+ mctx->hash[0] = cpu_to_le32(0x67452301);
+ mctx->hash[1] = cpu_to_le32(0xefcdab89);
+ mctx->hash[2] = cpu_to_le32(0x98badcfe);
+ mctx->hash[3] = cpu_to_le32(0x10325476);
+ mctx->byte_count = 0;
+
+ return 0;
+}
+
+static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data,
+ unsigned int len, unsigned int partial)
+{
+ unsigned int done = 0;
+
+ sctx->byte_count += len;
+ if (partial) {
+ done = MD5_HMAC_BLOCK_SIZE - partial;
+ memcpy((u8 *)sctx->block + partial, data, done);
+ md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1);
+ }
+ if (len - done >= MD5_HMAC_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE;
+
+ md5_sparc64_transform(sctx->hash, data + done, rounds);
+ done += rounds * MD5_HMAC_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->block, data + done, len - done);
+}
+
+static int md5_sparc64_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct md5_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
+
+ /* Handle the fast case right here */
+ if (partial + len < MD5_HMAC_BLOCK_SIZE) {
+ sctx->byte_count += len;
+ memcpy((u8 *)sctx->block + partial, data, len);
+ } else
+ __md5_sparc64_update(sctx, data, len, partial);
+
+ return 0;
+}
+
+/* Add padding and return the message digest. */
+static int md5_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+ struct md5_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ u32 *dst = (u32 *)out;
+ __le64 bits;
+ static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, };
+
+ bits = cpu_to_le64(sctx->byte_count << 3);
+
+ /* Pad out to 56 mod 64 and append length */
+ index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
+ padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index);
+
+ /* We need to fill a whole block for __md5_sparc64_update() */
+ if (padlen <= 56) {
+ sctx->byte_count += padlen;
+ memcpy((u8 *)sctx->block + index, padding, padlen);
+ } else {
+ __md5_sparc64_update(sctx, padding, padlen, index);
+ }
+ __md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
+
+ /* Store state in digest */
+ for (i = 0; i < MD5_HASH_WORDS; i++)
+ dst[i] = sctx->hash[i];
+
+ /* Wipe context */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int md5_sparc64_export(struct shash_desc *desc, void *out)
+{
+ struct md5_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int md5_sparc64_import(struct shash_desc *desc, const void *in)
+{
+ struct md5_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .digestsize = MD5_DIGEST_SIZE,
+ .init = md5_sparc64_init,
+ .update = md5_sparc64_update,
+ .final = md5_sparc64_final,
+ .export = md5_sparc64_export,
+ .import = md5_sparc64_import,
+ .descsize = sizeof(struct md5_state),
+ .statesize = sizeof(struct md5_state),
+ .base = {
+ .cra_name = "md5",
+ .cra_driver_name= "md5-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static bool __init sparc64_has_md5_opcode(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return false;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_MD5))
+ return false;
+
+ return true;
+}
+
+static int __init md5_sparc64_mod_init(void)
+{
+ if (sparc64_has_md5_opcode()) {
+ pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n");
+ return crypto_register_shash(&alg);
+ }
+ pr_info("sparc64 md5 opcode not available.\n");
+ return -ENODEV;
+}
+
+static void __exit md5_sparc64_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(md5_sparc64_mod_init);
+module_exit(md5_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated");
+
+MODULE_ALIAS("md5");
diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h
new file mode 100644
index 00000000000..19cbaea6976
--- /dev/null
+++ b/arch/sparc/crypto/opcodes.h
@@ -0,0 +1,99 @@
+#ifndef _OPCODES_H
+#define _OPCODES_H
+
+#define SPARC_CR_OPCODE_PRIORITY 300
+
+#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5))
+
+#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20)))
+
+#define RS1(x) (FPD_ENCODE(x) << 14)
+#define RS2(x) (FPD_ENCODE(x) << 0)
+#define RS3(x) (FPD_ENCODE(x) << 9)
+#define RD(x) (FPD_ENCODE(x) << 25)
+#define IMM5_0(x) ((x) << 0)
+#define IMM5_9(x) ((x) << 9)
+
+#define CRC32C(a,b,c) \
+ .word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c));
+
+#define MD5 \
+ .word 0x81b02800;
+#define SHA1 \
+ .word 0x81b02820;
+#define SHA256 \
+ .word 0x81b02840;
+#define SHA512 \
+ .word 0x81b02860;
+
+#define AES_EROUND01(a,b,c,d) \
+ .word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_EROUND23(a,b,c,d) \
+ .word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_DROUND01(a,b,c,d) \
+ .word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_DROUND23(a,b,c,d) \
+ .word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_EROUND01_L(a,b,c,d) \
+ .word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_EROUND23_L(a,b,c,d) \
+ .word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_DROUND01_L(a,b,c,d) \
+ .word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_DROUND23_L(a,b,c,d) \
+ .word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_KEXPAND1(a,b,c,d) \
+ .word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d));
+#define AES_KEXPAND0(a,b,c) \
+ .word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c));
+#define AES_KEXPAND2(a,b,c) \
+ .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c));
+
+#define DES_IP(a,b) \
+ .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b));
+#define DES_IIP(a,b) \
+ .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b));
+#define DES_KEXPAND(a,b,c) \
+ .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c));
+#define DES_ROUND(a,b,c,d) \
+ .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+
+#define CAMELLIA_F(a,b,c,d) \
+ .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define CAMELLIA_FL(a,b,c) \
+ .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c));
+#define CAMELLIA_FLI(a,b,c) \
+ .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c));
+
+#define MOVDTOX_F0_O4 \
+ .word 0x99b02200
+#define MOVDTOX_F2_O5 \
+ .word 0x9bb02202
+#define MOVXTOD_G1_F60 \
+ .word 0xbbb02301
+#define MOVXTOD_G1_F62 \
+ .word 0xbfb02301
+#define MOVXTOD_G3_F4 \
+ .word 0x89b02303;
+#define MOVXTOD_G7_F6 \
+ .word 0x8db02307;
+#define MOVXTOD_G3_F0 \
+ .word 0x81b02303;
+#define MOVXTOD_G7_F2 \
+ .word 0x85b02307;
+#define MOVXTOD_O0_F0 \
+ .word 0x81b02308;
+#define MOVXTOD_O5_F0 \
+ .word 0x81b0230d;
+#define MOVXTOD_O5_F2 \
+ .word 0x85b0230d;
+#define MOVXTOD_O5_F4 \
+ .word 0x89b0230d;
+#define MOVXTOD_O5_F6 \
+ .word 0x8db0230d;
+#define MOVXTOD_G3_F60 \
+ .word 0xbbb02303;
+#define MOVXTOD_G7_F62 \
+ .word 0xbfb02307;
+
+#endif /* _OPCODES_H */
diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S
new file mode 100644
index 00000000000..219d10c5ae0
--- /dev/null
+++ b/arch/sparc/crypto/sha1_asm.S
@@ -0,0 +1,72 @@
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+ENTRY(sha1_sparc64_transform)
+ /* %o0 = digest, %o1 = data, %o2 = rounds */
+ VISEntryHalf
+ ld [%o0 + 0x00], %f0
+ ld [%o0 + 0x04], %f1
+ ld [%o0 + 0x08], %f2
+ andcc %o1, 0x7, %g0
+ ld [%o0 + 0x0c], %f3
+ bne,pn %xcc, 10f
+ ld [%o0 + 0x10], %f4
+
+1:
+ ldd [%o1 + 0x00], %f8
+ ldd [%o1 + 0x08], %f10
+ ldd [%o1 + 0x10], %f12
+ ldd [%o1 + 0x18], %f14
+ ldd [%o1 + 0x20], %f16
+ ldd [%o1 + 0x28], %f18
+ ldd [%o1 + 0x30], %f20
+ ldd [%o1 + 0x38], %f22
+
+ SHA1
+
+ subcc %o2, 1, %o2
+ bne,pt %xcc, 1b
+ add %o1, 0x40, %o1
+
+5:
+ st %f0, [%o0 + 0x00]
+ st %f1, [%o0 + 0x04]
+ st %f2, [%o0 + 0x08]
+ st %f3, [%o0 + 0x0c]
+ st %f4, [%o0 + 0x10]
+ retl
+ VISExitHalf
+10:
+ alignaddr %o1, %g0, %o1
+
+ ldd [%o1 + 0x00], %f10
+1:
+ ldd [%o1 + 0x08], %f12
+ ldd [%o1 + 0x10], %f14
+ ldd [%o1 + 0x18], %f16
+ ldd [%o1 + 0x20], %f18
+ ldd [%o1 + 0x28], %f20
+ ldd [%o1 + 0x30], %f22
+ ldd [%o1 + 0x38], %f24
+ ldd [%o1 + 0x40], %f26
+
+ faligndata %f10, %f12, %f8
+ faligndata %f12, %f14, %f10
+ faligndata %f14, %f16, %f12
+ faligndata %f16, %f18, %f14
+ faligndata %f18, %f20, %f16
+ faligndata %f20, %f22, %f18
+ faligndata %f22, %f24, %f20
+ faligndata %f24, %f26, %f22
+
+ SHA1
+
+ subcc %o2, 1, %o2
+ fsrc2 %f26, %f10
+ bne,pt %xcc, 1b
+ add %o1, 0x40, %o1
+
+ ba,a,pt %xcc, 5b
+ENDPROC(sha1_sparc64_transform)
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c
new file mode 100644
index 00000000000..2bbb20bee9f
--- /dev/null
+++ b/arch/sparc/crypto/sha1_glue.c
@@ -0,0 +1,183 @@
+/* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data,
+ unsigned int rounds);
+
+static int sha1_sparc64_init(struct shash_desc *desc)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha1_state){
+ .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+ };
+
+ return 0;
+}
+
+static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data,
+ unsigned int len, unsigned int partial)
+{
+ unsigned int done = 0;
+
+ sctx->count += len;
+ if (partial) {
+ done = SHA1_BLOCK_SIZE - partial;
+ memcpy(sctx->buffer + partial, data, done);
+ sha1_sparc64_transform(sctx->state, sctx->buffer, 1);
+ }
+ if (len - done >= SHA1_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
+
+ sha1_sparc64_transform(sctx->state, data + done, rounds);
+ done += rounds * SHA1_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->buffer, data + done, len - done);
+}
+
+static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+
+ /* Handle the fast case right here */
+ if (partial + len < SHA1_BLOCK_SIZE) {
+ sctx->count += len;
+ memcpy(sctx->buffer + partial, data, len);
+ } else
+ __sha1_sparc64_update(sctx, data, len, partial);
+
+ return 0;
+}
+
+/* Add padding and return the message digest. */
+static int sha1_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ __be32 *dst = (__be32 *)out;
+ __be64 bits;
+ static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64 and append length */
+ index = sctx->count % SHA1_BLOCK_SIZE;
+ padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
+
+ /* We need to fill a whole block for __sha1_sparc64_update() */
+ if (padlen <= 56) {
+ sctx->count += padlen;
+ memcpy(sctx->buffer + index, padding, padlen);
+ } else {
+ __sha1_sparc64_update(sctx, padding, padlen, index);
+ }
+ __sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
+
+ /* Store state in digest */
+ for (i = 0; i < 5; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Wipe context */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha1_sparc64_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha1_sparc64_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .init = sha1_sparc64_init,
+ .update = sha1_sparc64_update,
+ .final = sha1_sparc64_final,
+ .export = sha1_sparc64_export,
+ .import = sha1_sparc64_import,
+ .descsize = sizeof(struct sha1_state),
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name= "sha1-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static bool __init sparc64_has_sha1_opcode(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return false;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_SHA1))
+ return false;
+
+ return true;
+}
+
+static int __init sha1_sparc64_mod_init(void)
+{
+ if (sparc64_has_sha1_opcode()) {
+ pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n");
+ return crypto_register_shash(&alg);
+ }
+ pr_info("sparc64 sha1 opcode not available.\n");
+ return -ENODEV;
+}
+
+static void __exit sha1_sparc64_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_sparc64_mod_init);
+module_exit(sha1_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated");
+
+MODULE_ALIAS("sha1");
diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S
new file mode 100644
index 00000000000..b5f3d5826eb
--- /dev/null
+++ b/arch/sparc/crypto/sha256_asm.S
@@ -0,0 +1,78 @@
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+ENTRY(sha256_sparc64_transform)
+ /* %o0 = digest, %o1 = data, %o2 = rounds */
+ VISEntryHalf
+ ld [%o0 + 0x00], %f0
+ ld [%o0 + 0x04], %f1
+ ld [%o0 + 0x08], %f2
+ ld [%o0 + 0x0c], %f3
+ ld [%o0 + 0x10], %f4
+ ld [%o0 + 0x14], %f5
+ andcc %o1, 0x7, %g0
+ ld [%o0 + 0x18], %f6
+ bne,pn %xcc, 10f
+ ld [%o0 + 0x1c], %f7
+
+1:
+ ldd [%o1 + 0x00], %f8
+ ldd [%o1 + 0x08], %f10
+ ldd [%o1 + 0x10], %f12
+ ldd [%o1 + 0x18], %f14
+ ldd [%o1 + 0x20], %f16
+ ldd [%o1 + 0x28], %f18
+ ldd [%o1 + 0x30], %f20
+ ldd [%o1 + 0x38], %f22
+
+ SHA256
+
+ subcc %o2, 1, %o2
+ bne,pt %xcc, 1b
+ add %o1, 0x40, %o1
+
+5:
+ st %f0, [%o0 + 0x00]
+ st %f1, [%o0 + 0x04]
+ st %f2, [%o0 + 0x08]
+ st %f3, [%o0 + 0x0c]
+ st %f4, [%o0 + 0x10]
+ st %f5, [%o0 + 0x14]
+ st %f6, [%o0 + 0x18]
+ st %f7, [%o0 + 0x1c]
+ retl
+ VISExitHalf
+10:
+ alignaddr %o1, %g0, %o1
+
+ ldd [%o1 + 0x00], %f10
+1:
+ ldd [%o1 + 0x08], %f12
+ ldd [%o1 + 0x10], %f14
+ ldd [%o1 + 0x18], %f16
+ ldd [%o1 + 0x20], %f18
+ ldd [%o1 + 0x28], %f20
+ ldd [%o1 + 0x30], %f22
+ ldd [%o1 + 0x38], %f24
+ ldd [%o1 + 0x40], %f26
+
+ faligndata %f10, %f12, %f8
+ faligndata %f12, %f14, %f10
+ faligndata %f14, %f16, %f12
+ faligndata %f16, %f18, %f14
+ faligndata %f18, %f20, %f16
+ faligndata %f20, %f22, %f18
+ faligndata %f22, %f24, %f20
+ faligndata %f24, %f26, %f22
+
+ SHA256
+
+ subcc %o2, 1, %o2
+ fsrc2 %f26, %f10
+ bne,pt %xcc, 1b
+ add %o1, 0x40, %o1
+
+ ba,a,pt %xcc, 5b
+ENDPROC(sha256_sparc64_transform)
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c
new file mode 100644
index 00000000000..591e656bd89
--- /dev/null
+++ b/arch/sparc/crypto/sha256_glue.c
@@ -0,0 +1,241 @@
+/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon crypto/sha256_generic.c
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data,
+ unsigned int rounds);
+
+static int sha224_sparc64_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ sctx->state[0] = SHA224_H0;
+ sctx->state[1] = SHA224_H1;
+ sctx->state[2] = SHA224_H2;
+ sctx->state[3] = SHA224_H3;
+ sctx->state[4] = SHA224_H4;
+ sctx->state[5] = SHA224_H5;
+ sctx->state[6] = SHA224_H6;
+ sctx->state[7] = SHA224_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+static int sha256_sparc64_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ sctx->state[0] = SHA256_H0;
+ sctx->state[1] = SHA256_H1;
+ sctx->state[2] = SHA256_H2;
+ sctx->state[3] = SHA256_H3;
+ sctx->state[4] = SHA256_H4;
+ sctx->state[5] = SHA256_H5;
+ sctx->state[6] = SHA256_H6;
+ sctx->state[7] = SHA256_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data,
+ unsigned int len, unsigned int partial)
+{
+ unsigned int done = 0;
+
+ sctx->count += len;
+ if (partial) {
+ done = SHA256_BLOCK_SIZE - partial;
+ memcpy(sctx->buf + partial, data, done);
+ sha256_sparc64_transform(sctx->state, sctx->buf, 1);
+ }
+ if (len - done >= SHA256_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
+
+ sha256_sparc64_transform(sctx->state, data + done, rounds);
+ done += rounds * SHA256_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->buf, data + done, len - done);
+}
+
+static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+ /* Handle the fast case right here */
+ if (partial + len < SHA256_BLOCK_SIZE) {
+ sctx->count += len;
+ memcpy(sctx->buf + partial, data, len);
+ } else
+ __sha256_sparc64_update(sctx, data, len, partial);
+
+ return 0;
+}
+
+static int sha256_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ __be32 *dst = (__be32 *)out;
+ __be64 bits;
+ static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64 and append length */
+ index = sctx->count % SHA256_BLOCK_SIZE;
+ padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index);
+
+ /* We need to fill a whole block for __sha256_sparc64_update() */
+ if (padlen <= 56) {
+ sctx->count += padlen;
+ memcpy(sctx->buf + index, padding, padlen);
+ } else {
+ __sha256_sparc64_update(sctx, padding, padlen, index);
+ }
+ __sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Wipe context */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash)
+{
+ u8 D[SHA256_DIGEST_SIZE];
+
+ sha256_sparc64_final(desc, D);
+
+ memcpy(hash, D, SHA224_DIGEST_SIZE);
+ memset(D, 0, SHA256_DIGEST_SIZE);
+
+ return 0;
+}
+
+static int sha256_sparc64_export(struct shash_desc *desc, void *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+ return 0;
+}
+
+static int sha256_sparc64_import(struct shash_desc *desc, const void *in)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+ return 0;
+}
+
+static struct shash_alg sha256 = {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = sha256_sparc64_init,
+ .update = sha256_sparc64_update,
+ .final = sha256_sparc64_final,
+ .export = sha256_sparc64_export,
+ .import = sha256_sparc64_import,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name= "sha256-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static struct shash_alg sha224 = {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = sha224_sparc64_init,
+ .update = sha256_sparc64_update,
+ .final = sha224_sparc64_final,
+ .descsize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name= "sha224-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static bool __init sparc64_has_sha256_opcode(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return false;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_SHA256))
+ return false;
+
+ return true;
+}
+
+static int __init sha256_sparc64_mod_init(void)
+{
+ if (sparc64_has_sha256_opcode()) {
+ int ret = crypto_register_shash(&sha224);
+ if (ret < 0)
+ return ret;
+
+ ret = crypto_register_shash(&sha256);
+ if (ret < 0) {
+ crypto_unregister_shash(&sha224);
+ return ret;
+ }
+
+ pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n");
+ return 0;
+ }
+ pr_info("sparc64 sha256 opcode not available.\n");
+ return -ENODEV;
+}
+
+static void __exit sha256_sparc64_mod_fini(void)
+{
+ crypto_unregister_shash(&sha224);
+ crypto_unregister_shash(&sha256);
+}
+
+module_init(sha256_sparc64_mod_init);
+module_exit(sha256_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated");
+
+MODULE_ALIAS("sha224");
+MODULE_ALIAS("sha256");
diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S
new file mode 100644
index 00000000000..54bfba713c0
--- /dev/null
+++ b/arch/sparc/crypto/sha512_asm.S
@@ -0,0 +1,102 @@
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+ENTRY(sha512_sparc64_transform)
+ /* %o0 = digest, %o1 = data, %o2 = rounds */
+ VISEntry
+ ldd [%o0 + 0x00], %f0
+ ldd [%o0 + 0x08], %f2
+ ldd [%o0 + 0x10], %f4
+ ldd [%o0 + 0x18], %f6
+ ldd [%o0 + 0x20], %f8
+ ldd [%o0 + 0x28], %f10
+ andcc %o1, 0x7, %g0
+ ldd [%o0 + 0x30], %f12
+ bne,pn %xcc, 10f
+ ldd [%o0 + 0x38], %f14
+
+1:
+ ldd [%o1 + 0x00], %f16
+ ldd [%o1 + 0x08], %f18
+ ldd [%o1 + 0x10], %f20
+ ldd [%o1 + 0x18], %f22
+ ldd [%o1 + 0x20], %f24
+ ldd [%o1 + 0x28], %f26
+ ldd [%o1 + 0x30], %f28
+ ldd [%o1 + 0x38], %f30
+ ldd [%o1 + 0x40], %f32
+ ldd [%o1 + 0x48], %f34
+ ldd [%o1 + 0x50], %f36
+ ldd [%o1 + 0x58], %f38
+ ldd [%o1 + 0x60], %f40
+ ldd [%o1 + 0x68], %f42
+ ldd [%o1 + 0x70], %f44
+ ldd [%o1 + 0x78], %f46
+
+ SHA512
+
+ subcc %o2, 1, %o2
+ bne,pt %xcc, 1b
+ add %o1, 0x80, %o1
+
+5:
+ std %f0, [%o0 + 0x00]
+ std %f2, [%o0 + 0x08]
+ std %f4, [%o0 + 0x10]
+ std %f6, [%o0 + 0x18]
+ std %f8, [%o0 + 0x20]
+ std %f10, [%o0 + 0x28]
+ std %f12, [%o0 + 0x30]
+ std %f14, [%o0 + 0x38]
+ retl
+ VISExit
+10:
+ alignaddr %o1, %g0, %o1
+
+ ldd [%o1 + 0x00], %f18
+1:
+ ldd [%o1 + 0x08], %f20
+ ldd [%o1 + 0x10], %f22
+ ldd [%o1 + 0x18], %f24
+ ldd [%o1 + 0x20], %f26
+ ldd [%o1 + 0x28], %f28
+ ldd [%o1 + 0x30], %f30
+ ldd [%o1 + 0x38], %f32
+ ldd [%o1 + 0x40], %f34
+ ldd [%o1 + 0x48], %f36
+ ldd [%o1 + 0x50], %f38
+ ldd [%o1 + 0x58], %f40
+ ldd [%o1 + 0x60], %f42
+ ldd [%o1 + 0x68], %f44
+ ldd [%o1 + 0x70], %f46
+ ldd [%o1 + 0x78], %f48
+ ldd [%o1 + 0x80], %f50
+
+ faligndata %f18, %f20, %f16
+ faligndata %f20, %f22, %f18
+ faligndata %f22, %f24, %f20
+ faligndata %f24, %f26, %f22
+ faligndata %f26, %f28, %f24
+ faligndata %f28, %f30, %f26
+ faligndata %f30, %f32, %f28
+ faligndata %f32, %f34, %f30
+ faligndata %f34, %f36, %f32
+ faligndata %f36, %f38, %f34
+ faligndata %f38, %f40, %f36
+ faligndata %f40, %f42, %f38
+ faligndata %f42, %f44, %f40
+ faligndata %f44, %f46, %f42
+ faligndata %f46, %f48, %f44
+ faligndata %f48, %f50, %f46
+
+ SHA512
+
+ subcc %o2, 1, %o2
+ fsrc2 %f50, %f18
+ bne,pt %xcc, 1b
+ add %o1, 0x80, %o1
+
+ ba,a,pt %xcc, 5b
+ENDPROC(sha512_sparc64_transform)
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c
new file mode 100644
index 00000000000..486f0a2b700
--- /dev/null
+++ b/arch/sparc/crypto/sha512_glue.c
@@ -0,0 +1,226 @@
+/* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon crypto/sha512_generic.c
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data,
+ unsigned int rounds);
+
+static int sha512_sparc64_init(struct shash_desc *desc)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ sctx->state[0] = SHA512_H0;
+ sctx->state[1] = SHA512_H1;
+ sctx->state[2] = SHA512_H2;
+ sctx->state[3] = SHA512_H3;
+ sctx->state[4] = SHA512_H4;
+ sctx->state[5] = SHA512_H5;
+ sctx->state[6] = SHA512_H6;
+ sctx->state[7] = SHA512_H7;
+ sctx->count[0] = sctx->count[1] = 0;
+
+ return 0;
+}
+
+static int sha384_sparc64_init(struct shash_desc *desc)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ sctx->state[0] = SHA384_H0;
+ sctx->state[1] = SHA384_H1;
+ sctx->state[2] = SHA384_H2;
+ sctx->state[3] = SHA384_H3;
+ sctx->state[4] = SHA384_H4;
+ sctx->state[5] = SHA384_H5;
+ sctx->state[6] = SHA384_H6;
+ sctx->state[7] = SHA384_H7;
+ sctx->count[0] = sctx->count[1] = 0;
+
+ return 0;
+}
+
+static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data,
+ unsigned int len, unsigned int partial)
+{
+ unsigned int done = 0;
+
+ if ((sctx->count[0] += len) < len)
+ sctx->count[1]++;
+ if (partial) {
+ done = SHA512_BLOCK_SIZE - partial;
+ memcpy(sctx->buf + partial, data, done);
+ sha512_sparc64_transform(sctx->state, sctx->buf, 1);
+ }
+ if (len - done >= SHA512_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
+
+ sha512_sparc64_transform(sctx->state, data + done, rounds);
+ done += rounds * SHA512_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->buf, data + done, len - done);
+}
+
+static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+
+ /* Handle the fast case right here */
+ if (partial + len < SHA512_BLOCK_SIZE) {
+ if ((sctx->count[0] += len) < len)
+ sctx->count[1]++;
+ memcpy(sctx->buf + partial, data, len);
+ } else
+ __sha512_sparc64_update(sctx, data, len, partial);
+
+ return 0;
+}
+
+static int sha512_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ __be64 *dst = (__be64 *)out;
+ __be64 bits[2];
+ static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
+
+ /* Save number of bits */
+ bits[1] = cpu_to_be64(sctx->count[0] << 3);
+ bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
+
+ /* Pad out to 112 mod 128 and append length */
+ index = sctx->count[0] % SHA512_BLOCK_SIZE;
+ padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index);
+
+ /* We need to fill a whole block for __sha512_sparc64_update() */
+ if (padlen <= 112) {
+ if ((sctx->count[0] += padlen) < padlen)
+ sctx->count[1]++;
+ memcpy(sctx->buf + index, padding, padlen);
+ } else {
+ __sha512_sparc64_update(sctx, padding, padlen, index);
+ }
+ __sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112);
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be64(sctx->state[i]);
+
+ /* Wipe context */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash)
+{
+ u8 D[64];
+
+ sha512_sparc64_final(desc, D);
+
+ memcpy(hash, D, 48);
+ memset(D, 0, 64);
+
+ return 0;
+}
+
+static struct shash_alg sha512 = {
+ .digestsize = SHA512_DIGEST_SIZE,
+ .init = sha512_sparc64_init,
+ .update = sha512_sparc64_update,
+ .final = sha512_sparc64_final,
+ .descsize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha512",
+ .cra_driver_name= "sha512-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static struct shash_alg sha384 = {
+ .digestsize = SHA384_DIGEST_SIZE,
+ .init = sha384_sparc64_init,
+ .update = sha512_sparc64_update,
+ .final = sha384_sparc64_final,
+ .descsize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha384",
+ .cra_driver_name= "sha384-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static bool __init sparc64_has_sha512_opcode(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return false;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_SHA512))
+ return false;
+
+ return true;
+}
+
+static int __init sha512_sparc64_mod_init(void)
+{
+ if (sparc64_has_sha512_opcode()) {
+ int ret = crypto_register_shash(&sha384);
+ if (ret < 0)
+ return ret;
+
+ ret = crypto_register_shash(&sha512);
+ if (ret < 0) {
+ crypto_unregister_shash(&sha384);
+ return ret;
+ }
+
+ pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n");
+ return 0;
+ }
+ pr_info("sparc64 sha512 opcode not available.\n");
+ return -ENODEV;
+}
+
+static void __exit sha512_sparc64_mod_fini(void)
+{
+ crypto_unregister_shash(&sha384);
+ crypto_unregister_shash(&sha512);
+}
+
+module_init(sha512_sparc64_mod_init);
+module_exit(sha512_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated");
+
+MODULE_ALIAS("sha384");
+MODULE_ALIAS("sha512");
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 67f83e0a0d6..f80ff93f6f7 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -17,6 +17,7 @@ header-y += uctx.h
header-y += utrap.h
header-y += watchdog.h
+generic-y += clkdev.h
generic-y += div64.h
generic-y += local64.h
generic-y += irq_regs.h
diff --git a/arch/sparc/include/asm/asi.h b/arch/sparc/include/asm/asi.h
index 61ebe7411ce..aace6f31371 100644
--- a/arch/sparc/include/asm/asi.h
+++ b/arch/sparc/include/asm/asi.h
@@ -141,7 +141,8 @@
/* SpitFire and later extended ASIs. The "(III)" marker designates
* UltraSparc-III and later specific ASIs. The "(CMT)" marker designates
* Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific
- * ASIs, "(4V)" designates SUN4V specific ASIs.
+ * ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4
+ * and later ASIs.
*/
#define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */
#define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */
@@ -243,6 +244,7 @@
#define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/
#define ASI_INTR_R 0x7f /* IRQ vector dispatch read */
#define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */
+#define ASI_PIC 0xb0 /* (NG4) PIC registers */
#define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */
#define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */
#define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */
@@ -268,9 +270,28 @@
#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 /* (NG) init-store, twin load,
* primary, implicit
*/
+#define ASI_BLK_INIT_QUAD_LDD_S 0xe3 /* (NG) init-store, twin load,
+ * secondary, implicit
+ */
#define ASI_BLK_P 0xf0 /* Primary, blk ld/st */
#define ASI_BLK_S 0xf1 /* Secondary, blk ld/st */
+#define ASI_ST_BLKINIT_MRU_P 0xf2 /* (NG4) init-store, twin load,
+ * Most-Recently-Used, primary,
+ * implicit
+ */
+#define ASI_ST_BLKINIT_MRU_S 0xf2 /* (NG4) init-store, twin load,
+ * Most-Recently-Used, secondary,
+ * implicit
+ */
#define ASI_BLK_PL 0xf8 /* Primary, blk ld/st, little */
#define ASI_BLK_SL 0xf9 /* Secondary, blk ld/st, little */
+#define ASI_ST_BLKINIT_MRU_PL 0xfa /* (NG4) init-store, twin load,
+ * Most-Recently-Used, primary,
+ * implicit, little-endian
+ */
+#define ASI_ST_BLKINIT_MRU_SL 0xfb /* (NG4) init-store, twin load,
+ * Most-Recently-Used, secondary,
+ * implicit, little-endian
+ */
#endif /* _SPARC_ASI_H */
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index b8be20d42a0..cef99fbc0a2 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -36,6 +36,7 @@ typedef s64 compat_s64;
typedef u32 compat_uint_t;
typedef u32 compat_ulong_t;
typedef u64 compat_u64;
+typedef u32 compat_uptr_t;
struct compat_timespec {
compat_time_t tv_sec;
@@ -147,6 +148,65 @@ typedef u32 compat_old_sigset_t;
typedef u32 compat_sigset_word;
+typedef union compat_sigval {
+ compat_int_t sival_int;
+ compat_uptr_t sival_ptr;
+} compat_sigval_t;
+
+#define SI_PAD_SIZE32 (128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+
+ union {
+ int _pad[SI_PAD_SIZE32];
+
+ /* kill() */
+ struct {
+ compat_pid_t _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ } _kill;
+
+ /* POSIX.1b timers */
+ struct {
+ compat_timer_t _tid; /* timer id */
+ int _overrun; /* overrun count */
+ compat_sigval_t _sigval; /* same as below */
+ int _sys_private; /* not to be passed to user */
+ } _timer;
+
+ /* POSIX.1b signals */
+ struct {
+ compat_pid_t _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ compat_sigval_t _sigval;
+ } _rt;
+
+ /* SIGCHLD */
+ struct {
+ compat_pid_t _pid; /* which child */
+ unsigned int _uid; /* sender's uid */
+ int _status; /* exit code */
+ compat_clock_t _utime;
+ compat_clock_t _stime;
+ } _sigchld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
+ struct {
+ u32 _addr; /* faulting insn/memory ref. */
+ int _trapno;
+ } _sigfault;
+
+ /* SIGPOLL */
+ struct {
+ int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} compat_siginfo_t;
+
#define COMPAT_OFF_T_MAX 0x7fffffff
#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL
@@ -156,7 +216,6 @@ typedef u32 compat_sigset_word;
* as pointers because the syscall entry code will have
* appropriately converted them already.
*/
-typedef u32 compat_uptr_t;
static inline void __user *compat_ptr(compat_uptr_t uptr)
{
diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h
index 2d4d755cba9..ac74a2c98e6 100644
--- a/arch/sparc/include/asm/elf_32.h
+++ b/arch/sparc/include/asm/elf_32.h
@@ -128,6 +128,7 @@ typedef struct {
#define ELF_PLATFORM (NULL)
-#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
+#define SET_PERSONALITY(ex) \
+ set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
#endif /* !(__ASMSPARC_ELF_H) */
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index 7df8b7f544d..370ca1e71ff 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -86,6 +86,15 @@
#define AV_SPARC_IMA 0x00400000 /* integer multiply-add */
#define AV_SPARC_ASI_CACHE_SPARING \
0x00800000 /* cache sparing ASIs available */
+#define AV_SPARC_PAUSE 0x01000000 /* PAUSE available */
+#define AV_SPARC_CBCOND 0x02000000 /* CBCOND insns available */
+
+/* Solaris decided to enumerate every single crypto instruction type
+ * in the AT_HWCAP bits. This is wasteful, since if crypto is present,
+ * you still need to look in the CFR register to see if the opcode is
+ * really available. So we simply advertise only "crypto" support.
+ */
+#define HWCAP_SPARC_CRYPTO 0x04000000 /* CRYPTO insns available */
#define CORE_DUMP_USE_REGSET
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 177061064ee..8c5eed6d267 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -10,7 +10,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
-void hugetlb_prefault_arch_hook(struct mm_struct *mm);
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+ hugetlb_setup(mm);
+}
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
@@ -82,4 +85,8 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
#endif /* _ASM_SPARC64_HUGETLB_H */
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 015a761eaa3..ca121f0fa3e 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2934,6 +2934,16 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra,
unsigned long len);
#endif
+#define HV_FAST_VT_GET_PERFREG 0x184
+#define HV_FAST_VT_SET_PERFREG 0x185
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_vt_get_perfreg(unsigned long reg_num,
+ unsigned long *reg_val);
+extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
+ unsigned long reg_val);
+#endif
+
/* Function numbers for HV_CORE_TRAP. */
#define HV_CORE_SET_VER 0x00
#define HV_CORE_PUTCHAR 0x01
@@ -2964,6 +2974,7 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra,
#define HV_GRP_NIU 0x0204
#define HV_GRP_VF_CPU 0x0205
#define HV_GRP_KT_CPU 0x0209
+#define HV_GRP_VT_CPU 0x020c
#define HV_GRP_DIAG 0x0300
#ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/mdesc.h b/arch/sparc/include/asm/mdesc.h
index 9faa046713f..139097f3a67 100644
--- a/arch/sparc/include/asm/mdesc.h
+++ b/arch/sparc/include/asm/mdesc.h
@@ -73,6 +73,7 @@ extern void mdesc_register_notifier(struct mdesc_notifier_client *client);
extern void mdesc_fill_in_cpu_data(cpumask_t *mask);
extern void mdesc_populate_present_mask(cpumask_t *mask);
+extern void mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask);
extern void sun4v_mdesc_init(void);
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
index 9067dc50053..76092c4dd27 100644
--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -30,22 +30,8 @@
#define CTX_PGSZ_MASK ((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \
(CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT))
-#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
#define CTX_PGSZ_BASE CTX_PGSZ_8KB
-#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
-#define CTX_PGSZ_BASE CTX_PGSZ_64KB
-#else
-#error No page size specified in kernel configuration
-#endif
-
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
-#define CTX_PGSZ_HUGE CTX_PGSZ_4MB
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define CTX_PGSZ_HUGE CTX_PGSZ_512KB
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define CTX_PGSZ_HUGE CTX_PGSZ_64KB
-#endif
-
+#define CTX_PGSZ_HUGE CTX_PGSZ_4MB
#define CTX_PGSZ_KERN CTX_PGSZ_4MB
/* Thus, when running on UltraSPARC-III+ and later, we use the following
@@ -96,7 +82,7 @@ struct tsb_config {
#define MM_TSB_BASE 0
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define MM_TSB_HUGE 1
#define MM_NUM_TSBS 2
#else
@@ -107,6 +93,7 @@ typedef struct {
spinlock_t lock;
unsigned long sparc64_ctx_val;
unsigned long huge_pte_count;
+ struct page *pgtable_page;
struct tsb_config tsb_block[MM_NUM_TSBS];
struct hv_tsb_descr tsb_descr[MM_NUM_TSBS];
} mm_context_t;
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index a97fd085ceb..9191ca62ed9 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -36,7 +36,7 @@ static inline void tsb_context_switch(struct mm_struct *mm)
{
__tsb_context_switch(__pa(mm->pgd),
&mm->context.tsb_block[0],
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
(mm->context.tsb_block[1].tsb ?
&mm->context.tsb_block[1] :
NULL)
diff --git a/arch/sparc/include/asm/oplib_32.h b/arch/sparc/include/asm/oplib_32.h
index 27517879a6c..c72f3045820 100644
--- a/arch/sparc/include/asm/oplib_32.h
+++ b/arch/sparc/include/asm/oplib_32.h
@@ -94,7 +94,7 @@ extern int prom_getprev(void);
extern void prom_console_write_buf(const char *buf, int len);
/* Prom's internal routines, don't use in kernel/boot code. */
-extern void prom_printf(const char *fmt, ...);
+extern __printf(1, 2) void prom_printf(const char *fmt, ...);
extern void prom_write(const char *buf, unsigned int len);
/* Multiprocessor operations... */
diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h
index 97a90475c31..a12dbe3b776 100644
--- a/arch/sparc/include/asm/oplib_64.h
+++ b/arch/sparc/include/asm/oplib_64.h
@@ -98,7 +98,7 @@ extern unsigned char prom_get_idprom(char *idp_buffer, int idpbuf_size);
extern void prom_console_write_buf(const char *buf, int len);
/* Prom's internal routines, don't use in kernel/boot code. */
-extern void prom_printf(const char *fmt, ...);
+extern __printf(1, 2) void prom_printf(const char *fmt, ...);
extern void prom_write(const char *buf, unsigned int len);
/* Multiprocessor operations... */
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index f0d09b40103..4b39f74d6ca 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -3,13 +3,7 @@
#include <linux/const.h>
-#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
#define PAGE_SHIFT 13
-#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
-#define PAGE_SHIFT 16
-#else
-#error No page size specified in kernel configuration
-#endif
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
@@ -21,15 +15,9 @@
#define DCACHE_ALIASING_POSSIBLE
#endif
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
#define HPAGE_SHIFT 22
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define HPAGE_SHIFT 19
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define HPAGE_SHIFT 16
-#endif
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
@@ -38,6 +26,11 @@
#ifndef __ASSEMBLY__
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+struct mm_struct;
+extern void hugetlb_setup(struct mm_struct *mm);
+#endif
+
#define WANT_PAGE_VIRTUAL
extern void _clear_page(void *page);
@@ -98,7 +91,7 @@ typedef unsigned long pgprot_t;
#endif /* (STRICT_MM_TYPECHECKS) */
-typedef struct page *pgtable_t;
+typedef pte_t *pgtable_t;
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
(_AC(0x0000000070000000,UL)) : \
diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h
index 288d7beba05..942bb17f60c 100644
--- a/arch/sparc/include/asm/pcr.h
+++ b/arch/sparc/include/asm/pcr.h
@@ -2,8 +2,13 @@
#define __PCR_H
struct pcr_ops {
- u64 (*read)(void);
- void (*write)(u64);
+ u64 (*read_pcr)(unsigned long);
+ void (*write_pcr)(unsigned long, u64);
+ u64 (*read_pic)(unsigned long);
+ void (*write_pic)(unsigned long, u64);
+ u64 (*nmi_picl_value)(unsigned int nmi_hz);
+ u64 pcr_nmi_enable;
+ u64 pcr_nmi_disable;
};
extern const struct pcr_ops *pcr_ops;
@@ -27,21 +32,18 @@ extern void schedule_deferred_pcr_work(void);
#define PCR_N2_SL1_SHIFT 27
#define PCR_N2_OV1 0x80000000
-extern unsigned int picl_shift;
-
-/* In order to commonize as much of the implementation as
- * possible, we use PICH as our counter. Mostly this is
- * to accommodate Niagara-1 which can only count insn cycles
- * in PICH.
- */
-static inline u64 picl_value(unsigned int nmi_hz)
-{
- u32 delta = local_cpu_data().clock_tick / (nmi_hz << picl_shift);
-
- return ((u64)((0 - delta) & 0xffffffff)) << 32;
-}
-
-extern u64 pcr_enable;
+#define PCR_N4_OV 0x00000001 /* PIC overflow */
+#define PCR_N4_TOE 0x00000002 /* Trap On Event */
+#define PCR_N4_UTRACE 0x00000004 /* Trace user events */
+#define PCR_N4_STRACE 0x00000008 /* Trace supervisor events */
+#define PCR_N4_HTRACE 0x00000010 /* Trace hypervisor events */
+#define PCR_N4_MASK 0x000007e0 /* Event mask */
+#define PCR_N4_MASK_SHIFT 5
+#define PCR_N4_SL 0x0000f800 /* Event Select */
+#define PCR_N4_SL_SHIFT 11
+#define PCR_N4_PICNPT 0x00010000 /* PIC non-privileged trap */
+#define PCR_N4_PICNHT 0x00020000 /* PIC non-hypervisor trap */
+#define PCR_N4_NTC 0x00040000 /* Next-To-Commit wrap */
extern int pcr_arch_init(void);
diff --git a/arch/sparc/include/asm/perfctr.h b/arch/sparc/include/asm/perfctr.h
index 3332d2cba6c..214feefa577 100644
--- a/arch/sparc/include/asm/perfctr.h
+++ b/arch/sparc/include/asm/perfctr.h
@@ -54,11 +54,6 @@ enum perfctr_opcode {
PERFCTR_GETPCR
};
-/* I don't want the kernel's namespace to be polluted with this
- * stuff when this file is included. --DaveM
- */
-#ifndef __KERNEL__
-
#define PRIV 0x00000001
#define SYS 0x00000002
#define USR 0x00000004
@@ -168,29 +163,4 @@ struct vcounter_struct {
unsigned long long vcnt1;
};
-#else /* !(__KERNEL__) */
-
-#ifndef CONFIG_SPARC32
-
-/* Performance counter register access. */
-#define read_pcr(__p) __asm__ __volatile__("rd %%pcr, %0" : "=r" (__p))
-#define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p))
-#define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p))
-
-/* Blackbird errata workaround. See commentary in
- * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
- * for more information.
- */
-#define write_pic(__p) \
- __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \
- " nop\n\t" \
- ".align 64\n" \
- "99:wr %0, 0x0, %%pic\n\t" \
- "rd %%pic, %%g0" : : "r" (__p))
-#define reset_pic() write_pic(0)
-
-#endif /* !CONFIG_SPARC32 */
-
-#endif /* !(__KERNEL__) */
-
#endif /* !(PERF_COUNTER_API) */
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 40b2d7a7023..bcfe063bce2 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -38,51 +38,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(pgtable_cache, pmd);
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
-{
- return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
-{
- struct page *page;
- pte_t *pte;
-
- pte = pte_alloc_one_kernel(mm, address);
- if (!pte)
- return NULL;
- page = virt_to_page(pte);
- pgtable_page_ctor(page);
- return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address);
+extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
+extern void pte_free(struct mm_struct *mm, pgtable_t ptepage);
-#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
-#define pmd_populate(MM,PMD,PTE_PAGE) \
- pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
-#define pmd_pgtable(pmd) pmd_page(pmd)
+#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
+#define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
+#define pmd_pgtable(PMD) ((pte_t *)__pmd_page(PMD))
#define check_pgt_cache() do { } while (0)
-static inline void pgtable_free(void *table, bool is_page)
-{
- if (is_page)
- free_page((unsigned long)table);
- else
- kmem_cache_free(pgtable_cache, table);
-}
+extern void pgtable_free(void *table, bool is_page);
#ifdef CONFIG_SMP
@@ -113,11 +82,10 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, bool is
}
#endif /* !CONFIG_SMP */
-static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte,
unsigned long address)
{
- pgtable_page_dtor(ptepage);
- pgtable_free_tlb(tlb, page_address(ptepage), true);
+ pgtable_free_tlb(tlb, pte, true);
}
#define __pmd_free_tlb(tlb, pmd, addr) \
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 61210db139f..95515f1e7ce 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -45,40 +45,59 @@
#define vmemmap ((struct page *)VMEMMAP_BASE)
-/* XXX All of this needs to be rethought so we can take advantage
- * XXX cheetah's full 64-bit virtual address space, ie. no more hole
- * XXX in the middle like on spitfire. -DaveM
- */
-/*
- * Given a virtual address, the lowest PAGE_SHIFT bits determine offset
- * into the page; the next higher PAGE_SHIFT-3 bits determine the pte#
- * in the proper pagetable (the -3 is from the 8 byte ptes, and each page
- * table is a single page long). The next higher PMD_BITS determine pmd#
- * in the proper pmdtable (where we must have PMD_BITS <= (PAGE_SHIFT-2)
- * since the pmd entries are 4 bytes, and each pmd page is a single page
- * long). Finally, the higher few bits determine pgde#.
- */
-
/* PMD_SHIFT determines the size of the area a second-level page
* table can map
*/
-#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3))
+#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4))
#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PMD_BITS (PAGE_SHIFT - 2)
/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
+#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS)
#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PGDIR_BITS (PAGE_SHIFT - 2)
+#if (PGDIR_SHIFT + PGDIR_BITS) != 44
+#error Page table parameters do not cover virtual address space properly.
+#endif
+
+#if (PMD_SHIFT != HPAGE_SHIFT)
+#error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages.
+#endif
+
+/* PMDs point to PTE tables which are 4K aligned. */
+#define PMD_PADDR _AC(0xfffffffe,UL)
+#define PMD_PADDR_SHIFT _AC(11,UL)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PMD_ISHUGE _AC(0x00000001,UL)
+
+/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge
+ * pages, this frees up a bunch of bits in the layout that we can
+ * use for the protection settings and software metadata.
+ */
+#define PMD_HUGE_PADDR _AC(0xfffff800,UL)
+#define PMD_HUGE_PROTBITS _AC(0x000007ff,UL)
+#define PMD_HUGE_PRESENT _AC(0x00000400,UL)
+#define PMD_HUGE_WRITE _AC(0x00000200,UL)
+#define PMD_HUGE_DIRTY _AC(0x00000100,UL)
+#define PMD_HUGE_ACCESSED _AC(0x00000080,UL)
+#define PMD_HUGE_EXEC _AC(0x00000040,UL)
+#define PMD_HUGE_SPLITTING _AC(0x00000020,UL)
+#endif
+
+/* PGDs point to PMD tables which are 8K aligned. */
+#define PGD_PADDR _AC(0xfffffffc,UL)
+#define PGD_PADDR_SHIFT _AC(11,UL)
+
#ifndef __ASSEMBLY__
#include <linux/sched.h>
/* Entries per page directory level. */
-#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
+#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-4))
#define PTRS_PER_PMD (1UL << PMD_BITS)
#define PTRS_PER_PGD (1UL << PGDIR_BITS)
@@ -160,26 +179,11 @@
#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */
#define _PAGE_SZALL_4V _AC(0x0000000000000007,UL) /* All pgsz bits */
-#if PAGE_SHIFT == 13
#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U
#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V
-#elif PAGE_SHIFT == 16
-#define _PAGE_SZBITS_4U _PAGE_SZ64K_4U
-#define _PAGE_SZBITS_4V _PAGE_SZ64K_4V
-#else
-#error Wrong PAGE_SHIFT specified
-#endif
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U
#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U
-#define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U
-#define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V
-#endif
/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
#define __P000 __pgprot(0)
@@ -218,7 +222,6 @@ extern unsigned long _PAGE_CACHE;
extern unsigned long pg_iobits;
extern unsigned long _PAGE_ALL_SZ_BITS;
-extern unsigned long _PAGE_SZBITS;
extern struct page *mem_map_zero;
#define ZERO_PAGE(vaddr) (mem_map_zero)
@@ -231,25 +234,25 @@ extern struct page *mem_map_zero;
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
{
unsigned long paddr = pfn << PAGE_SHIFT;
- unsigned long sz_bits;
-
- sz_bits = 0UL;
- if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) {
- __asm__ __volatile__(
- "\n661: sethi %%uhi(%1), %0\n"
- " sllx %0, 32, %0\n"
- " .section .sun4v_2insn_patch, \"ax\"\n"
- " .word 661b\n"
- " mov %2, %0\n"
- " nop\n"
- " .previous\n"
- : "=r" (sz_bits)
- : "i" (_PAGE_SZBITS_4U), "i" (_PAGE_SZBITS_4V));
- }
- return __pte(paddr | sz_bits | pgprot_val(prot));
+
+ BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
+ return __pte(paddr | pgprot_val(prot));
}
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot);
+#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
+
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ /* Do nothing, mk_pmd() does this part. */
+ return pmd;
+}
+#endif
+
/* This one can be done with two shifts. */
static inline unsigned long pte_pfn(pte_t pte)
{
@@ -286,6 +289,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
* Note: We encode this into 3 sun4v 2-insn patch sequences.
*/
+ BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
__asm__ __volatile__(
"\n661: sethi %%uhi(%2), %1\n"
" sethi %%hi(%2), %0\n"
@@ -307,10 +311,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
: "=r" (mask), "=r" (tmp)
: "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U |
- _PAGE_SZBITS_4U | _PAGE_SPECIAL),
+ _PAGE_SPECIAL),
"i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V |
- _PAGE_SZBITS_4V | _PAGE_SPECIAL));
+ _PAGE_SPECIAL));
return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
}
@@ -618,19 +622,130 @@ static inline unsigned long pte_special(pte_t pte)
return pte_val(pte) & _PAGE_SPECIAL;
}
-#define pmd_set(pmdp, ptep) \
- (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL))
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_young(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_HUGE_ACCESSED;
+}
+
+static inline int pmd_write(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_HUGE_WRITE;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR;
+
+ return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT);
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+ return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
+ (PMD_ISHUGE | PMD_HUGE_PRESENT);
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
+ (PMD_ISHUGE|PMD_HUGE_SPLITTING);
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_ISHUGE;
+}
+
+#define has_transparent_hugepage() 1
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_ACCESSED;
+ return pmd;
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_WRITE;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_DIRTY;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_WRITE;
+ return pmd;
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_PRESENT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_SPLITTING;
+ return pmd;
+}
+
+extern pgprot_t pmd_pgprot(pmd_t entry);
+#endif
+
+static inline int pmd_present(pmd_t pmd)
+{
+ return pmd_val(pmd) != 0U;
+}
+
+#define pmd_none(pmd) (!pmd_val(pmd))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+#else
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ *pmdp = pmd;
+}
+#endif
+
+static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+{
+ unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT;
+
+ pmd_val(*pmdp) = val;
+}
+
#define pud_set(pudp, pmdp) \
- (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> 11UL))
-#define __pmd_page(pmd) \
- ((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL)))
+ (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT))
+static inline unsigned long __pmd_page(pmd_t pmd)
+{
+ unsigned long paddr = (unsigned long) pmd_val(pmd);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (pmd_val(pmd) & PMD_ISHUGE)
+ paddr &= PMD_HUGE_PADDR;
+#endif
+ paddr <<= PMD_PADDR_SHIFT;
+ return ((unsigned long) __va(paddr));
+}
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
#define pud_page_vaddr(pud) \
- ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL)))
+ ((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT)))
#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
-#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (0)
-#define pmd_present(pmd) (pmd_val(pmd) != 0U)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (0)
@@ -664,6 +779,16 @@ static inline unsigned long pte_special(pte_t pte)
extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
pte_t *ptep, pte_t orig, int fullmm);
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
+ set_pmd_at(mm, addr, pmdp, __pmd(0U));
+ return pmd;
+}
+
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int fullmm)
{
@@ -719,6 +844,16 @@ extern void mmu_info(struct seq_file *);
struct vm_area_struct;
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd);
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+#endif
/* Encode and de-code a swap entry */
#define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL)
diff --git a/arch/sparc/include/asm/pstate.h b/arch/sparc/include/asm/pstate.h
index a26a53777bb..4b6b998afd9 100644
--- a/arch/sparc/include/asm/pstate.h
+++ b/arch/sparc/include/asm/pstate.h
@@ -88,4 +88,18 @@
#define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */
#define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/
+/* Compatability Feature Register (%asr26), SPARC-T4 and later */
+#define CFR_AES _AC(0x0000000000000001,UL) /* Supports AES opcodes */
+#define CFR_DES _AC(0x0000000000000002,UL) /* Supports DES opcodes */
+#define CFR_KASUMI _AC(0x0000000000000004,UL) /* Supports KASUMI opcodes */
+#define CFR_CAMELLIA _AC(0x0000000000000008,UL) /* Supports CAMELLIA opcodes*/
+#define CFR_MD5 _AC(0x0000000000000010,UL) /* Supports MD5 opcodes */
+#define CFR_SHA1 _AC(0x0000000000000020,UL) /* Supports SHA1 opcodes */
+#define CFR_SHA256 _AC(0x0000000000000040,UL) /* Supports SHA256 opcodes */
+#define CFR_SHA512 _AC(0x0000000000000080,UL) /* Supports SHA512 opcodes */
+#define CFR_MPMUL _AC(0x0000000000000100,UL) /* Supports MPMUL opcodes */
+#define CFR_MONTMUL _AC(0x0000000000000200,UL) /* Supports MONTMUL opcodes */
+#define CFR_MONTSQR _AC(0x0000000000000400,UL) /* Supports MONTSQR opcodes */
+#define CFR_CRC32C _AC(0x0000000000000800,UL) /* Supports CRC32C opcodes */
+
#endif /* !(_SPARC64_PSTATE_H) */
diff --git a/arch/sparc/include/asm/siginfo.h b/arch/sparc/include/asm/siginfo.h
index 215900fce21..dbc182c438b 100644
--- a/arch/sparc/include/asm/siginfo.h
+++ b/arch/sparc/include/asm/siginfo.h
@@ -3,7 +3,6 @@
#if defined(__sparc__) && defined(__arch64__)
-#define SI_PAD_SIZE32 ((SI_MAX_SIZE/sizeof(int)) - 3)
#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
#define __ARCH_SI_BAND_T int
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index 1a8afd1ad04..b4c258de444 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -147,20 +147,96 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, 11, REG1; \
+ sllx REG1, PGD_PADDR_SHIFT, REG1; \
andn REG2, 0x3, REG2; \
lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - PMD_SHIFT, REG2; \
- srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, 11, REG1; \
+ srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
+ sllx REG1, PMD_PADDR_SHIFT, REG1; \
andn REG2, 0x7, REG2; \
add REG1, REG2, REG1;
- /* Do a user page table walk in MMU globals. Leaves physical PTE
- * pointer in REG1. Jumps to FAIL_LABEL on early page table walk
- * termination. Physical base of page tables is in PHYS_PGD which
- * will not be modified.
+ /* This macro exists only to make the PMD translator below easier
+ * to read. It hides the ELF section switch for the sun4v code
+ * patching.
+ */
+#define OR_PTE_BIT(REG, NAME) \
+661: or REG, _PAGE_##NAME##_4U, REG; \
+ .section .sun4v_1insn_patch, "ax"; \
+ .word 661b; \
+ or REG, _PAGE_##NAME##_4V, REG; \
+ .previous;
+
+ /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */
+#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \
+661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \
+ .section .sun4v_1insn_patch, "ax"; \
+ .word 661b; \
+ sethi %uhi(_PAGE_VALID), REG; \
+ .previous; \
+ sllx REG, 32, REG; \
+661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \
+ .section .sun4v_1insn_patch, "ax"; \
+ .word 661b; \
+ or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \
+ .previous;
+
+ /* PMD has been loaded into REG1, interpret the value, seeing
+ * if it is a HUGE PMD or a normal one. If it is not valid
+ * then jump to FAIL_LABEL. If it is a HUGE PMD, and it
+ * translates to a valid PTE, branch to PTE_LABEL.
+ *
+ * We translate the PMD by hand, one bit at a time,
+ * constructing the huge PTE.
+ *
+ * So we construct the PTE in REG2 as follows:
+ *
+ * 1) Extract the PMD PFN from REG1 and place it into REG2.
+ *
+ * 2) Translate PMD protection bits in REG1 into REG2, one bit
+ * at a time using andcc tests on REG1 and OR's into REG2.
+ *
+ * Only two bits to be concerned with here, EXEC and WRITE.
+ * Now REG1 is freed up and we can use it as a temporary.
+ *
+ * 3) Construct the VALID, CACHE, and page size PTE bits in
+ * REG1, OR with REG2 to form final PTE.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+ brz,pn REG1, FAIL_LABEL; \
+ andcc REG1, PMD_ISHUGE, %g0; \
+ be,pt %xcc, 700f; \
+ and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \
+ cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \
+ bne,pn %xcc, FAIL_LABEL; \
+ andn REG1, PMD_HUGE_PROTBITS, REG2; \
+ sllx REG2, PMD_PADDR_SHIFT, REG2; \
+ /* REG2 now holds PFN << PAGE_SHIFT */ \
+ andcc REG1, PMD_HUGE_EXEC, %g0; \
+ bne,a,pt %xcc, 1f; \
+ OR_PTE_BIT(REG2, EXEC); \
+1: andcc REG1, PMD_HUGE_WRITE, %g0; \
+ bne,a,pt %xcc, 1f; \
+ OR_PTE_BIT(REG2, W); \
+ /* REG1 can now be clobbered, build final PTE */ \
+1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \
+ ba,pt %xcc, PTE_LABEL; \
+ or REG1, REG2, REG1; \
+700:
+#else
+#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+ brz,pn REG1, FAIL_LABEL; \
+ nop;
+#endif
+
+ /* Do a user page table walk in MMU globals. Leaves final,
+ * valid, PTE value in REG1. Jumps to FAIL_LABEL on early
+ * page table walk termination or if the PTE is not valid.
+ *
+ * Physical base of page tables is in PHYS_PGD which will not
+ * be modified.
*
* VADDR will not be clobbered, but REG1 and REG2 will.
*/
@@ -172,15 +248,19 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, 11, REG1; \
+ sllx REG1, PGD_PADDR_SHIFT, REG1; \
andn REG2, 0x3, REG2; \
lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
- brz,pn REG1, FAIL_LABEL; \
- sllx VADDR, 64 - PMD_SHIFT, REG2; \
- srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, 11, REG1; \
+ USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
+ sllx REG1, PMD_PADDR_SHIFT, REG1; \
andn REG2, 0x7, REG2; \
- add REG1, REG2, REG1;
+ add REG1, REG2, REG1; \
+ ldxa [REG1] ASI_PHYS_USE_EC, REG1; \
+ brgez,pn REG1, FAIL_LABEL; \
+ nop; \
+800:
/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
* If no entry is found, FAIL_LABEL will be branched to. On success
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index fb269346480..d9a677c5192 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -447,6 +447,7 @@
#else
#define __ARCH_WANT_COMPAT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_COMPAT_SYS_SENDFILE
#endif
/*
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..7518ad28696
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -0,0 +1,5 @@
+# UAPI Header export list
+# User exported sparc header files
+
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index b42ddbf9651..2feb15c35d9 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -559,10 +559,10 @@ niagara_tlb_fixup:
be,pt %xcc, niagara2_patch
nop
cmp %g1, SUN4V_CHIP_NIAGARA4
- be,pt %xcc, niagara2_patch
+ be,pt %xcc, niagara4_patch
nop
cmp %g1, SUN4V_CHIP_NIAGARA5
- be,pt %xcc, niagara2_patch
+ be,pt %xcc, niagara4_patch
nop
call generic_patch_copyops
@@ -573,6 +573,16 @@ niagara_tlb_fixup:
nop
ba,a,pt %xcc, 80f
+niagara4_patch:
+ call niagara4_patch_copyops
+ nop
+ call niagara4_patch_bzero
+ nop
+ call niagara4_patch_pageops
+ nop
+
+ ba,a,pt %xcc, 80f
+
niagara2_patch:
call niagara2_patch_copyops
nop
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 8593672838f..c0a2de0fd62 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -45,6 +45,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_NIU, },
{ .group = HV_GRP_VF_CPU, },
{ .group = HV_GRP_KT_CPU, },
+ { .group = HV_GRP_VT_CPU, },
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
};
@@ -193,7 +194,7 @@ void __init sun4v_hvapi_init(void)
bad:
prom_printf("HVAPI: Cannot register API group "
- "%lx with major(%u) minor(%u)\n",
+ "%lx with major(%lu) minor(%lu)\n",
group, major, minor);
prom_halt();
}
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index 58d60de4d65..f3ab509b76a 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -805,3 +805,19 @@ ENTRY(sun4v_reboot_data_set)
retl
nop
ENDPROC(sun4v_reboot_data_set)
+
+ENTRY(sun4v_vt_get_perfreg)
+ mov %o1, %o4
+ mov HV_FAST_VT_GET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_vt_get_perfreg)
+
+ENTRY(sun4v_vt_set_perfreg)
+ mov HV_FAST_VT_SET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_vt_set_perfreg)
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
index 79f31036484..0746e5e32b3 100644
--- a/arch/sparc/kernel/ktlb.S
+++ b/arch/sparc/kernel/ktlb.S
@@ -188,31 +188,26 @@ valid_addr_bitmap_patch:
be,pn %xcc, kvmap_dtlb_longpath
2: sethi %hi(kpte_linear_bitmap), %g2
- or %g2, %lo(kpte_linear_bitmap), %g2
/* Get the 256MB physical address index. */
sllx %g4, 21, %g5
- mov 1, %g7
+ or %g2, %lo(kpte_linear_bitmap), %g2
srlx %g5, 21 + 28, %g5
+ and %g5, (32 - 1), %g7
- /* Don't try this at home kids... this depends upon srlx
- * only taking the low 6 bits of the shift count in %g5.
- */
- sllx %g7, %g5, %g7
-
- /* Divide by 64 to get the offset into the bitmask. */
- srlx %g5, 6, %g5
+ /* Divide by 32 to get the offset into the bitmask. */
+ srlx %g5, 5, %g5
+ add %g7, %g7, %g7
sllx %g5, 3, %g5
- /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */
+ /* kern_linear_pte_xor[(mask >> shift) & 3)] */
ldx [%g2 + %g5], %g2
- andcc %g2, %g7, %g0
+ srlx %g2, %g7, %g7
sethi %hi(kern_linear_pte_xor), %g5
+ and %g7, 3, %g7
or %g5, %lo(kern_linear_pte_xor), %g5
- bne,a,pt %xcc, 1f
- add %g5, 8, %g5
-
-1: ldx [%g5], %g2
+ sllx %g7, 3, %g7
+ ldx [%g5 + %g7], %g2
.globl kvmap_linear_patch
kvmap_linear_patch:
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 21dcda75a52..fc052116156 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -102,15 +102,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
return pci_enable_resources(dev, mask);
}
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-#ifdef CONFIG_PCI_DEBUG
- printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq,
- pci_name(dev));
-#endif
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
/* in/out routines taken from pcic.c
*
* This probably belongs here rather than ioport.c because
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 6dc79628058..831c001604e 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -817,6 +817,30 @@ void __cpuinit mdesc_populate_present_mask(cpumask_t *mask)
mdesc_iterate_over_cpus(record_one_cpu, NULL, mask);
}
+static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
+{
+ const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL);
+ unsigned long *pgsz_mask = arg;
+ u64 val;
+
+ val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
+ HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
+ if (pgsz_prop)
+ val = *pgsz_prop;
+
+ if (!*pgsz_mask)
+ *pgsz_mask = val;
+ else
+ *pgsz_mask &= val;
+ return NULL;
+}
+
+void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask)
+{
+ *pgsz_mask = 0;
+ mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask);
+}
+
static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
{
const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 15e0a169397..f1ddc0d2367 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -48,9 +48,7 @@ void *module_alloc(unsigned long size)
return NULL;
ret = module_map(size);
- if (!ret)
- ret = ERR_PTR(-ENOMEM);
- else
+ if (ret)
memset(ret, 0, size);
return ret;
@@ -116,6 +114,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
v = sym->st_value + rel[i].r_addend;
switch (ELF_R_TYPE(rel[i].r_info) & 0xff) {
+ case R_SPARC_DISP32:
+ v -= (Elf_Addr) location;
+ *loc32 = v;
+ break;
#ifdef CONFIG_SPARC64
case R_SPARC_64:
location[0] = v >> 56;
@@ -128,11 +130,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
location[7] = v >> 0;
break;
- case R_SPARC_DISP32:
- v -= (Elf_Addr) location;
- *loc32 = v;
- break;
-
case R_SPARC_WDISP19:
v -= (Elf_Addr) location;
*loc32 = (*loc32 & ~0x7ffff) |
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index eb1c1f010a4..6479256fd5a 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -22,7 +22,6 @@
#include <asm/perf_event.h>
#include <asm/ptrace.h>
#include <asm/pcr.h>
-#include <asm/perfctr.h>
#include "kstack.h"
@@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
touched = 1;
else
- pcr_ops->write(PCR_PIC_PRIV);
+ pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
sum = local_cpu_data().irq0_irqs;
if (__get_cpu_var(nmi_touch)) {
@@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
__this_cpu_write(alert_counter, 0);
}
if (__get_cpu_var(wd_enabled)) {
- write_pic(picl_value(nmi_hz));
- pcr_ops->write(pcr_enable);
+ pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
+ pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
}
restore_hardirq_stack(orig_sp);
@@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
void stop_nmi_watchdog(void *unused)
{
- pcr_ops->write(PCR_PIC_PRIV);
+ pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
__get_cpu_var(wd_enabled) = 0;
atomic_dec(&nmi_active);
}
@@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused)
__get_cpu_var(wd_enabled) = 1;
atomic_inc(&nmi_active);
- pcr_ops->write(PCR_PIC_PRIV);
- write_pic(picl_value(nmi_hz));
+ pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
+ pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
- pcr_ops->write(pcr_enable);
+ pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
}
static void nmi_adjust_hz_one(void *unused)
@@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused)
if (!__get_cpu_var(wd_enabled))
return;
- pcr_ops->write(PCR_PIC_PRIV);
- write_pic(picl_value(nmi_hz));
+ pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
+ pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
- pcr_ops->write(pcr_enable);
+ pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
}
void nmi_adjust_hz(unsigned int new_hz)
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 065b88c4f86..75b31bcdead 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -622,10 +622,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus)
{
}
-void pcibios_update_irq(struct pci_dev *pdev, int irq)
-{
-}
-
resource_size_t pcibios_align_resource(void *data, const struct resource *res,
resource_size_t size, resource_size_t align)
{
@@ -783,7 +779,7 @@ static int __pci_mmap_make_offset(struct pci_dev *pdev,
static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma,
enum pci_mmap_state mmap_state)
{
- vma->vm_flags |= (VM_IO | VM_RESERVED);
+ vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
}
/* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 7661e84a05a..051b69caeff 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -594,7 +594,7 @@ static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
vdma[0], vdma[1]);
return -EINVAL;
- };
+ }
dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 0ce0dd2332a..269af58497a 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -13,23 +13,14 @@
#include <asm/pil.h>
#include <asm/pcr.h>
#include <asm/nmi.h>
+#include <asm/asi.h>
#include <asm/spitfire.h>
-#include <asm/perfctr.h>
/* This code is shared between various users of the performance
* counters. Users will be oprofile, pseudo-NMI watchdog, and the
* perf_event support layer.
*/
-#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE)
-#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \
- PCR_N2_TOE_OV1 | \
- (2 << PCR_N2_SL1_SHIFT) | \
- (0xff << PCR_N2_MASK1_SHIFT))
-
-u64 pcr_enable;
-unsigned int picl_shift;
-
/* Performance counter interrupts run unmasked at PIL level 15.
* Therefore we can't do things like wakeups and other work
* that expects IRQ disabling to be adhered to in locking etc.
@@ -60,39 +51,144 @@ void arch_irq_work_raise(void)
const struct pcr_ops *pcr_ops;
EXPORT_SYMBOL_GPL(pcr_ops);
-static u64 direct_pcr_read(void)
+static u64 direct_pcr_read(unsigned long reg_num)
{
u64 val;
- read_pcr(val);
+ WARN_ON_ONCE(reg_num != 0);
+ __asm__ __volatile__("rd %%pcr, %0" : "=r" (val));
return val;
}
-static void direct_pcr_write(u64 val)
+static void direct_pcr_write(unsigned long reg_num, u64 val)
+{
+ WARN_ON_ONCE(reg_num != 0);
+ __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val));
+}
+
+static u64 direct_pic_read(unsigned long reg_num)
{
- write_pcr(val);
+ u64 val;
+
+ WARN_ON_ONCE(reg_num != 0);
+ __asm__ __volatile__("rd %%pic, %0" : "=r" (val));
+ return val;
+}
+
+static void direct_pic_write(unsigned long reg_num, u64 val)
+{
+ WARN_ON_ONCE(reg_num != 0);
+
+ /* Blackbird errata workaround. See commentary in
+ * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
+ * for more information.
+ */
+ __asm__ __volatile__("ba,pt %%xcc, 99f\n\t"
+ " nop\n\t"
+ ".align 64\n"
+ "99:wr %0, 0x0, %%pic\n\t"
+ "rd %%pic, %%g0" : : "r" (val));
+}
+
+static u64 direct_picl_value(unsigned int nmi_hz)
+{
+ u32 delta = local_cpu_data().clock_tick / nmi_hz;
+
+ return ((u64)((0 - delta) & 0xffffffff)) << 32;
}
static const struct pcr_ops direct_pcr_ops = {
- .read = direct_pcr_read,
- .write = direct_pcr_write,
+ .read_pcr = direct_pcr_read,
+ .write_pcr = direct_pcr_write,
+ .read_pic = direct_pic_read,
+ .write_pic = direct_pic_write,
+ .nmi_picl_value = direct_picl_value,
+ .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE),
+ .pcr_nmi_disable = PCR_PIC_PRIV,
};
-static void n2_pcr_write(u64 val)
+static void n2_pcr_write(unsigned long reg_num, u64 val)
{
unsigned long ret;
+ WARN_ON_ONCE(reg_num != 0);
if (val & PCR_N2_HTRACE) {
ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
if (ret != HV_EOK)
- write_pcr(val);
+ direct_pcr_write(reg_num, val);
} else
- write_pcr(val);
+ direct_pcr_write(reg_num, val);
+}
+
+static u64 n2_picl_value(unsigned int nmi_hz)
+{
+ u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
+
+ return ((u64)((0 - delta) & 0xffffffff)) << 32;
}
static const struct pcr_ops n2_pcr_ops = {
- .read = direct_pcr_read,
- .write = n2_pcr_write,
+ .read_pcr = direct_pcr_read,
+ .write_pcr = n2_pcr_write,
+ .read_pic = direct_pic_read,
+ .write_pic = direct_pic_write,
+ .nmi_picl_value = n2_picl_value,
+ .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE |
+ PCR_N2_TOE_OV1 |
+ (2 << PCR_N2_SL1_SHIFT) |
+ (0xff << PCR_N2_MASK1_SHIFT)),
+ .pcr_nmi_disable = PCR_PIC_PRIV,
+};
+
+static u64 n4_pcr_read(unsigned long reg_num)
+{
+ unsigned long val;
+
+ (void) sun4v_vt_get_perfreg(reg_num, &val);
+
+ return val;
+}
+
+static void n4_pcr_write(unsigned long reg_num, u64 val)
+{
+ (void) sun4v_vt_set_perfreg(reg_num, val);
+}
+
+static u64 n4_pic_read(unsigned long reg_num)
+{
+ unsigned long val;
+
+ __asm__ __volatile__("ldxa [%1] %2, %0"
+ : "=r" (val)
+ : "r" (reg_num * 0x8UL), "i" (ASI_PIC));
+
+ return val;
+}
+
+static void n4_pic_write(unsigned long reg_num, u64 val)
+{
+ __asm__ __volatile__("stxa %0, [%1] %2"
+ : /* no outputs */
+ : "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC));
+}
+
+static u64 n4_picl_value(unsigned int nmi_hz)
+{
+ u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
+
+ return ((u64)((0 - delta) & 0xffffffff));
+}
+
+static const struct pcr_ops n4_pcr_ops = {
+ .read_pcr = n4_pcr_read,
+ .write_pcr = n4_pcr_write,
+ .read_pic = n4_pic_read,
+ .write_pic = n4_pic_write,
+ .nmi_picl_value = n4_picl_value,
+ .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
+ PCR_N4_UTRACE | PCR_N4_TOE |
+ (26 << PCR_N4_SL_SHIFT)),
+ .pcr_nmi_disable = PCR_N4_PICNPT,
};
static unsigned long perf_hsvc_group;
@@ -115,6 +211,10 @@ static int __init register_perf_hsvc(void)
perf_hsvc_group = HV_GRP_KT_CPU;
break;
+ case SUN4V_CHIP_NIAGARA4:
+ perf_hsvc_group = HV_GRP_VT_CPU;
+ break;
+
default:
return -ENODEV;
}
@@ -139,6 +239,29 @@ static void __init unregister_perf_hsvc(void)
sun4v_hvapi_unregister(perf_hsvc_group);
}
+static int __init setup_sun4v_pcr_ops(void)
+{
+ int ret = 0;
+
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_NIAGARA1:
+ case SUN4V_CHIP_NIAGARA2:
+ case SUN4V_CHIP_NIAGARA3:
+ pcr_ops = &n2_pcr_ops;
+ break;
+
+ case SUN4V_CHIP_NIAGARA4:
+ pcr_ops = &n4_pcr_ops;
+ break;
+
+ default:
+ ret = -ENODEV;
+ break;
+ }
+
+ return ret;
+}
+
int __init pcr_arch_init(void)
{
int err = register_perf_hsvc();
@@ -148,15 +271,14 @@ int __init pcr_arch_init(void)
switch (tlb_type) {
case hypervisor:
- pcr_ops = &n2_pcr_ops;
- pcr_enable = PCR_N2_ENABLE;
- picl_shift = 2;
+ err = setup_sun4v_pcr_ops();
+ if (err)
+ goto out_unregister;
break;
case cheetah:
case cheetah_plus:
pcr_ops = &direct_pcr_ops;
- pcr_enable = PCR_SUN4U_ENABLE;
break;
case spitfire:
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 5713957dcb8..e48651dace1 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -25,36 +25,48 @@
#include <linux/atomic.h>
#include <asm/nmi.h>
#include <asm/pcr.h>
-#include <asm/perfctr.h>
#include <asm/cacheflush.h>
#include "kernel.h"
#include "kstack.h"
-/* Sparc64 chips have two performance counters, 32-bits each, with
- * overflow interrupts generated on transition from 0xffffffff to 0.
- * The counters are accessed in one go using a 64-bit register.
+/* Two classes of sparc64 chips currently exist. All of which have
+ * 32-bit counters which can generate overflow interrupts on the
+ * transition from 0xffffffff to 0.
*
- * Both counters are controlled using a single control register. The
- * only way to stop all sampling is to clear all of the context (user,
- * supervisor, hypervisor) sampling enable bits. But these bits apply
- * to both counters, thus the two counters can't be enabled/disabled
- * individually.
+ * All chips upto and including SPARC-T3 have two performance
+ * counters. The two 32-bit counters are accessed in one go using a
+ * single 64-bit register.
*
- * The control register has two event fields, one for each of the two
- * counters. It's thus nearly impossible to have one counter going
- * while keeping the other one stopped. Therefore it is possible to
- * get overflow interrupts for counters not currently "in use" and
- * that condition must be checked in the overflow interrupt handler.
+ * On these older chips both counters are controlled using a single
+ * control register. The only way to stop all sampling is to clear
+ * all of the context (user, supervisor, hypervisor) sampling enable
+ * bits. But these bits apply to both counters, thus the two counters
+ * can't be enabled/disabled individually.
+ *
+ * Furthermore, the control register on these older chips have two
+ * event fields, one for each of the two counters. It's thus nearly
+ * impossible to have one counter going while keeping the other one
+ * stopped. Therefore it is possible to get overflow interrupts for
+ * counters not currently "in use" and that condition must be checked
+ * in the overflow interrupt handler.
*
* So we use a hack, in that we program inactive counters with the
* "sw_count0" and "sw_count1" events. These count how many times
* the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
* unusual way to encode a NOP and therefore will not trigger in
* normal code.
+ *
+ * Starting with SPARC-T4 we have one control register per counter.
+ * And the counters are stored in individual registers. The registers
+ * for the counters are 64-bit but only a 32-bit counter is
+ * implemented. The event selections on SPARC-T4 lack any
+ * restrictions, therefore we can elide all of the complicated
+ * conflict resolution code we have for SPARC-T3 and earlier chips.
*/
-#define MAX_HWEVENTS 2
+#define MAX_HWEVENTS 4
+#define MAX_PCRS 4
#define MAX_PERIOD ((1UL << 32) - 1)
#define PIC_UPPER_INDEX 0
@@ -90,8 +102,8 @@ struct cpu_hw_events {
*/
int current_idx[MAX_HWEVENTS];
- /* Software copy of %pcr register on this cpu. */
- u64 pcr;
+ /* Software copy of %pcr register(s) on this cpu. */
+ u64 pcr[MAX_HWEVENTS];
/* Enabled/disable state. */
int enabled;
@@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
/* An event map describes the characteristics of a performance
* counter event. In particular it gives the encoding as well as
* a mask telling which counters the event can be measured on.
+ *
+ * The mask is unused on SPARC-T4 and later.
*/
struct perf_event_map {
u16 encoding;
@@ -142,15 +156,53 @@ struct sparc_pmu {
const struct perf_event_map *(*event_map)(int);
const cache_map_t *cache_map;
int max_events;
+ u32 (*read_pmc)(int);
+ void (*write_pmc)(int, u64);
int upper_shift;
int lower_shift;
int event_mask;
+ int user_bit;
+ int priv_bit;
int hv_bit;
int irq_bit;
int upper_nop;
int lower_nop;
+ unsigned int flags;
+#define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001
+#define SPARC_PMU_HAS_CONFLICTS 0x00000002
+ int max_hw_events;
+ int num_pcrs;
+ int num_pic_regs;
};
+static u32 sparc_default_read_pmc(int idx)
+{
+ u64 val;
+
+ val = pcr_ops->read_pic(0);
+ if (idx == PIC_UPPER_INDEX)
+ val >>= 32;
+
+ return val & 0xffffffff;
+}
+
+static void sparc_default_write_pmc(int idx, u64 val)
+{
+ u64 shift, mask, pic;
+
+ shift = 0;
+ if (idx == PIC_UPPER_INDEX)
+ shift = 32;
+
+ mask = ((u64) 0xffffffff) << shift;
+ val <<= shift;
+
+ pic = pcr_ops->read_pic(0);
+ pic &= ~mask;
+ pic |= val;
+ pcr_ops->write_pic(0, pic);
+}
+
static const struct perf_event_map ultra3_perfmon_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
@@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = {
.event_map = ultra3_event_map,
.cache_map = &ultra3_cache_map,
.max_events = ARRAY_SIZE(ultra3_perfmon_event_map),
+ .read_pmc = sparc_default_read_pmc,
+ .write_pmc = sparc_default_write_pmc,
.upper_shift = 11,
.lower_shift = 4,
.event_mask = 0x3f,
+ .user_bit = PCR_UTRACE,
+ .priv_bit = PCR_STRACE,
.upper_nop = 0x1c,
.lower_nop = 0x14,
+ .flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
+ SPARC_PMU_HAS_CONFLICTS),
+ .max_hw_events = 2,
+ .num_pcrs = 1,
+ .num_pic_regs = 1,
};
/* Niagara1 is very limited. The upper PIC is hard-locked to count
@@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = {
.event_map = niagara1_event_map,
.cache_map = &niagara1_cache_map,
.max_events = ARRAY_SIZE(niagara1_perfmon_event_map),
+ .read_pmc = sparc_default_read_pmc,
+ .write_pmc = sparc_default_write_pmc,
.upper_shift = 0,
.lower_shift = 4,
.event_mask = 0x7,
+ .user_bit = PCR_UTRACE,
+ .priv_bit = PCR_STRACE,
.upper_nop = 0x0,
.lower_nop = 0x0,
+ .flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
+ SPARC_PMU_HAS_CONFLICTS),
+ .max_hw_events = 2,
+ .num_pcrs = 1,
+ .num_pic_regs = 1,
};
static const struct perf_event_map niagara2_perfmon_event_map[] = {
@@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = {
.event_map = niagara2_event_map,
.cache_map = &niagara2_cache_map,
.max_events = ARRAY_SIZE(niagara2_perfmon_event_map),
+ .read_pmc = sparc_default_read_pmc,
+ .write_pmc = sparc_default_write_pmc,
.upper_shift = 19,
.lower_shift = 6,
.event_mask = 0xfff,
- .hv_bit = 0x8,
+ .user_bit = PCR_UTRACE,
+ .priv_bit = PCR_STRACE,
+ .hv_bit = PCR_N2_HTRACE,
.irq_bit = 0x30,
.upper_nop = 0x220,
.lower_nop = 0x220,
+ .flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
+ SPARC_PMU_HAS_CONFLICTS),
+ .max_hw_events = 2,
+ .num_pcrs = 1,
+ .num_pic_regs = 1,
+};
+
+static const struct perf_event_map niagara4_perfmon_event_map[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) },
+ [PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f },
+ [PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 },
+ [PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 },
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 },
+ [PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f },
+};
+
+static const struct perf_event_map *niagara4_event_map(int event_id)
+{
+ return &niagara4_perfmon_event_map[event_id];
+}
+
+static const cache_map_t niagara4_cache_map = {
+[C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
+ [C(RESULT_MISS)] = { (16 << 6) | 0x07 },
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
+ [C(RESULT_MISS)] = { (16 << 6) | 0x07 },
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+ [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+ },
+},
+[C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = { (3 << 6) | 0x3f },
+ [C(RESULT_MISS)] = { (11 << 6) | 0x03 },
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
+ [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE },
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+ [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+},
+[C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
+ [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
+ [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+ [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+ },
+},
+[C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+ [C(RESULT_MISS)] = { (17 << 6) | 0x3f },
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+ [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+ [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+},
+[C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+ [C(RESULT_MISS)] = { (6 << 6) | 0x3f },
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+ [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+ [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+},
+[C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+ [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+ [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+ [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+},
+[C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+ [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+ [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+ [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
+ },
+},
+};
+
+static u32 sparc_vt_read_pmc(int idx)
+{
+ u64 val = pcr_ops->read_pic(idx);
+
+ return val & 0xffffffff;
+}
+
+static void sparc_vt_write_pmc(int idx, u64 val)
+{
+ u64 pcr;
+
+ /* There seems to be an internal latch on the overflow event
+ * on SPARC-T4 that prevents it from triggering unless you
+ * update the PIC exactly as we do here. The requirement
+ * seems to be that you have to turn off event counting in the
+ * PCR around the PIC update.
+ *
+ * For example, after the following sequence:
+ *
+ * 1) set PIC to -1
+ * 2) enable event counting and overflow reporting in PCR
+ * 3) overflow triggers, softint 15 handler invoked
+ * 4) clear OV bit in PCR
+ * 5) write PIC to -1
+ *
+ * a subsequent overflow event will not trigger. This
+ * sequence works on SPARC-T3 and previous chips.
+ */
+ pcr = pcr_ops->read_pcr(idx);
+ pcr_ops->write_pcr(idx, PCR_N4_PICNPT);
+
+ pcr_ops->write_pic(idx, val & 0xffffffff);
+
+ pcr_ops->write_pcr(idx, pcr);
+}
+
+static const struct sparc_pmu niagara4_pmu = {
+ .event_map = niagara4_event_map,
+ .cache_map = &niagara4_cache_map,
+ .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
+ .read_pmc = sparc_vt_read_pmc,
+ .write_pmc = sparc_vt_write_pmc,
+ .upper_shift = 5,
+ .lower_shift = 5,
+ .event_mask = 0x7ff,
+ .user_bit = PCR_N4_UTRACE,
+ .priv_bit = PCR_N4_STRACE,
+
+ /* We explicitly don't support hypervisor tracing. The T4
+ * generates the overflow event for precise events via a trap
+ * which will not be generated (ie. it's completely lost) if
+ * we happen to be in the hypervisor when the event triggers.
+ * Essentially, the overflow event reporting is completely
+ * unusable when you have hypervisor mode tracing enabled.
+ */
+ .hv_bit = 0,
+
+ .irq_bit = PCR_N4_TOE,
+ .upper_nop = 0,
+ .lower_nop = 0,
+ .flags = 0,
+ .max_hw_events = 4,
+ .num_pcrs = 4,
+ .num_pic_regs = 4,
};
static const struct sparc_pmu *sparc_pmu __read_mostly;
@@ -558,55 +818,35 @@ static u64 nop_for_index(int idx)
static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
{
u64 val, mask = mask_for_index(idx);
+ int pcr_index = 0;
- val = cpuc->pcr;
+ if (sparc_pmu->num_pcrs > 1)
+ pcr_index = idx;
+
+ val = cpuc->pcr[pcr_index];
val &= ~mask;
val |= hwc->config;
- cpuc->pcr = val;
+ cpuc->pcr[pcr_index] = val;
- pcr_ops->write(cpuc->pcr);
+ pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
}
static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
{
u64 mask = mask_for_index(idx);
u64 nop = nop_for_index(idx);
+ int pcr_index = 0;
u64 val;
- val = cpuc->pcr;
+ if (sparc_pmu->num_pcrs > 1)
+ pcr_index = idx;
+
+ val = cpuc->pcr[pcr_index];
val &= ~mask;
val |= nop;
- cpuc->pcr = val;
+ cpuc->pcr[pcr_index] = val;
- pcr_ops->write(cpuc->pcr);
-}
-
-static u32 read_pmc(int idx)
-{
- u64 val;
-
- read_pic(val);
- if (idx == PIC_UPPER_INDEX)
- val >>= 32;
-
- return val & 0xffffffff;
-}
-
-static void write_pmc(int idx, u64 val)
-{
- u64 shift, mask, pic;
-
- shift = 0;
- if (idx == PIC_UPPER_INDEX)
- shift = 32;
-
- mask = ((u64) 0xffffffff) << shift;
- val <<= shift;
-
- read_pic(pic);
- pic &= ~mask;
- pic |= val;
- write_pic(pic);
+ pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
}
static u64 sparc_perf_event_update(struct perf_event *event,
@@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event,
again:
prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = read_pmc(idx);
+ new_raw_count = sparc_pmu->read_pmc(idx);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event,
local64_set(&hwc->prev_count, (u64)-left);
- write_pmc(idx, (u64)(-left) & 0xffffffff);
+ sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff);
perf_event_update_userpage(event);
return ret;
}
-/* If performance event entries have been added, move existing
- * events around (if necessary) and then assign new entries to
- * counters.
- */
-static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
+static void read_in_all_counters(struct cpu_hw_events *cpuc)
{
int i;
- if (!cpuc->n_added)
- goto out;
-
- /* Read in the counters which are moving. */
for (i = 0; i < cpuc->n_events; i++) {
struct perf_event *cp = cpuc->event[i];
@@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
cpuc->current_idx[i] = PIC_NO_INDEX;
}
}
+}
+
+/* On this PMU all PICs are programmed using a single PCR. Calculate
+ * the combined control register value.
+ *
+ * For such chips we require that all of the events have the same
+ * configuration, so just fetch the settings from the first entry.
+ */
+static void calculate_single_pcr(struct cpu_hw_events *cpuc)
+{
+ int i;
+
+ if (!cpuc->n_added)
+ goto out;
/* Assign to counters all unassigned events. */
for (i = 0; i < cpuc->n_events; i++) {
@@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
cpuc->current_idx[i] = idx;
enc = perf_event_get_enc(cpuc->events[i]);
- pcr &= ~mask_for_index(idx);
+ cpuc->pcr[0] &= ~mask_for_index(idx);
if (hwc->state & PERF_HES_STOPPED)
- pcr |= nop_for_index(idx);
+ cpuc->pcr[0] |= nop_for_index(idx);
else
- pcr |= event_encoding(enc, idx);
+ cpuc->pcr[0] |= event_encoding(enc, idx);
}
out:
- return pcr;
+ cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
+}
+
+/* On this PMU each PIC has it's own PCR control register. */
+static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
+{
+ int i;
+
+ if (!cpuc->n_added)
+ goto out;
+
+ for (i = 0; i < cpuc->n_events; i++) {
+ struct perf_event *cp = cpuc->event[i];
+ struct hw_perf_event *hwc = &cp->hw;
+ int idx = hwc->idx;
+ u64 enc;
+
+ if (cpuc->current_idx[i] != PIC_NO_INDEX)
+ continue;
+
+ sparc_perf_event_set_period(cp, hwc, idx);
+ cpuc->current_idx[i] = idx;
+
+ enc = perf_event_get_enc(cpuc->events[i]);
+ cpuc->pcr[idx] &= ~mask_for_index(idx);
+ if (hwc->state & PERF_HES_STOPPED)
+ cpuc->pcr[idx] |= nop_for_index(idx);
+ else
+ cpuc->pcr[idx] |= event_encoding(enc, idx);
+ }
+out:
+ for (i = 0; i < cpuc->n_events; i++) {
+ struct perf_event *cp = cpuc->event[i];
+ int idx = cp->hw.idx;
+
+ cpuc->pcr[idx] |= cp->hw.config_base;
+ }
+}
+
+/* If performance event entries have been added, move existing events
+ * around (if necessary) and then assign new entries to counters.
+ */
+static void update_pcrs_for_enable(struct cpu_hw_events *cpuc)
+{
+ if (cpuc->n_added)
+ read_in_all_counters(cpuc);
+
+ if (sparc_pmu->num_pcrs == 1) {
+ calculate_single_pcr(cpuc);
+ } else {
+ calculate_multiple_pcrs(cpuc);
+ }
}
static void sparc_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- u64 pcr;
+ int i;
if (cpuc->enabled)
return;
@@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu)
cpuc->enabled = 1;
barrier();
- pcr = cpuc->pcr;
- if (!cpuc->n_events) {
- pcr = 0;
- } else {
- pcr = maybe_change_configuration(cpuc, pcr);
-
- /* We require that all of the events have the same
- * configuration, so just fetch the settings from the
- * first entry.
- */
- cpuc->pcr = pcr | cpuc->event[0]->hw.config_base;
- }
+ if (cpuc->n_events)
+ update_pcrs_for_enable(cpuc);
- pcr_ops->write(cpuc->pcr);
+ for (i = 0; i < sparc_pmu->num_pcrs; i++)
+ pcr_ops->write_pcr(i, cpuc->pcr[i]);
}
static void sparc_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- u64 val;
+ int i;
if (!cpuc->enabled)
return;
@@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu)
cpuc->enabled = 0;
cpuc->n_added = 0;
- val = cpuc->pcr;
- val &= ~(PCR_UTRACE | PCR_STRACE |
- sparc_pmu->hv_bit | sparc_pmu->irq_bit);
- cpuc->pcr = val;
+ for (i = 0; i < sparc_pmu->num_pcrs; i++) {
+ u64 val = cpuc->pcr[i];
- pcr_ops->write(cpuc->pcr);
+ val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit |
+ sparc_pmu->hv_bit | sparc_pmu->irq_bit);
+ cpuc->pcr[i] = val;
+ pcr_ops->write_pcr(i, cpuc->pcr[i]);
+ }
}
static int active_event_index(struct cpu_hw_events *cpuc,
@@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex);
static void perf_stop_nmi_watchdog(void *unused)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ int i;
stop_nmi_watchdog(NULL);
- cpuc->pcr = pcr_ops->read();
+ for (i = 0; i < sparc_pmu->num_pcrs; i++)
+ cpuc->pcr[i] = pcr_ops->read_pcr(i);
}
void perf_event_grab_pmc(void)
@@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts,
if (!n_ev)
return 0;
- if (n_ev > MAX_HWEVENTS)
+ if (n_ev > sparc_pmu->max_hw_events)
return -1;
+ if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) {
+ int i;
+
+ for (i = 0; i < n_ev; i++)
+ evts[i]->hw.idx = i;
+ return 0;
+ }
+
msk0 = perf_event_get_msk(events[0]);
if (n_ev == 1) {
if (msk0 & PIC_LOWER)
@@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
struct perf_event *event;
int i, n, first;
+ if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME))
+ return 0;
+
n = n_prev + n_new;
if (n <= 1)
return 0;
@@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
- if (n0 >= MAX_HWEVENTS)
+ if (n0 >= sparc_pmu->max_hw_events)
goto out;
cpuc->event[n0] = event;
@@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event)
/* We save the enable bits in the config_base. */
hwc->config_base = sparc_pmu->irq_bit;
if (!attr->exclude_user)
- hwc->config_base |= PCR_UTRACE;
+ hwc->config_base |= sparc_pmu->user_bit;
if (!attr->exclude_kernel)
- hwc->config_base |= PCR_STRACE;
+ hwc->config_base |= sparc_pmu->priv_bit;
if (!attr->exclude_hv)
hwc->config_base |= sparc_pmu->hv_bit;
n = 0;
if (event->group_leader != event) {
n = collect_events(event->group_leader,
- MAX_HWEVENTS - 1,
+ sparc_pmu->max_hw_events - 1,
evts, events, current_idx_dmy);
if (n < 0)
return -EINVAL;
@@ -1254,8 +1557,7 @@ static struct pmu pmu = {
void perf_event_print_debug(void)
{
unsigned long flags;
- u64 pcr, pic;
- int cpu;
+ int cpu, i;
if (!sparc_pmu)
return;
@@ -1264,12 +1566,13 @@ void perf_event_print_debug(void)
cpu = smp_processor_id();
- pcr = pcr_ops->read();
- read_pic(pic);
-
pr_info("\n");
- pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
- cpu, pcr, pic);
+ for (i = 0; i < sparc_pmu->num_pcrs; i++)
+ pr_info("CPU#%d: PCR%d[%016llx]\n",
+ cpu, i, pcr_ops->read_pcr(i));
+ for (i = 0; i < sparc_pmu->num_pic_regs; i++)
+ pr_info("CPU#%d: PIC%d[%016llx]\n",
+ cpu, i, pcr_ops->read_pic(i));
local_irq_restore(flags);
}
@@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
* Do this before we peek at the counters to determine
* overflow so we don't lose any events.
*/
- if (sparc_pmu->irq_bit)
- pcr_ops->write(cpuc->pcr);
+ if (sparc_pmu->irq_bit &&
+ sparc_pmu->num_pcrs == 1)
+ pcr_ops->write_pcr(0, cpuc->pcr[0]);
for (i = 0; i < cpuc->n_events; i++) {
struct perf_event *event = cpuc->event[i];
@@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
struct hw_perf_event *hwc;
u64 val;
+ if (sparc_pmu->irq_bit &&
+ sparc_pmu->num_pcrs > 1)
+ pcr_ops->write_pcr(idx, cpuc->pcr[idx]);
+
hwc = &event->hw;
val = sparc_perf_event_update(event, hwc, idx);
if (val & (1ULL << 31))
@@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void)
sparc_pmu = &niagara2_pmu;
return true;
}
+ if (!strcmp(sparc_pmu_type, "niagara4")) {
+ sparc_pmu = &niagara4_pmu;
+ return true;
+ }
return false;
}
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index 340c5b976d2..d397d7fc5c2 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -37,7 +37,7 @@ void * __init prom_early_alloc(unsigned long size)
void *ret;
if (!paddr) {
- prom_printf("prom_early_alloc(%lu) failed\n");
+ prom_printf("prom_early_alloc(%lu) failed\n", size);
prom_halt();
}
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 1414d16712b..0800e71d8a8 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -340,7 +340,12 @@ static const char *hwcaps[] = {
*/
"mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
"ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
- "ima", "cspare",
+ "ima", "cspare", "pause", "cbcond",
+};
+
+static const char *crypto_hwcaps[] = {
+ "aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256",
+ "sha512", "mpmul", "montmul", "montsqr", "crc32c",
};
void cpucap_info(struct seq_file *m)
@@ -357,27 +362,61 @@ void cpucap_info(struct seq_file *m)
printed++;
}
}
+ if (caps & HWCAP_SPARC_CRYPTO) {
+ unsigned long cfr;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
+ unsigned long bit = 1UL << i;
+ if (cfr & bit) {
+ seq_printf(m, "%s%s",
+ printed ? "," : "", crypto_hwcaps[i]);
+ printed++;
+ }
+ }
+ }
seq_putc(m, '\n');
}
+static void __init report_one_hwcap(int *printed, const char *name)
+{
+ if ((*printed) == 0)
+ printk(KERN_INFO "CPU CAPS: [");
+ printk(KERN_CONT "%s%s",
+ (*printed) ? "," : "", name);
+ if (++(*printed) == 8) {
+ printk(KERN_CONT "]\n");
+ *printed = 0;
+ }
+}
+
+static void __init report_crypto_hwcaps(int *printed)
+{
+ unsigned long cfr;
+ int i;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+
+ for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
+ unsigned long bit = 1UL << i;
+ if (cfr & bit)
+ report_one_hwcap(printed, crypto_hwcaps[i]);
+ }
+}
+
static void __init report_hwcaps(unsigned long caps)
{
int i, printed = 0;
- printk(KERN_INFO "CPU CAPS: [");
for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
unsigned long bit = 1UL << i;
- if (caps & bit) {
- printk(KERN_CONT "%s%s",
- printed ? "," : "", hwcaps[i]);
- if (++printed == 8) {
- printk(KERN_CONT "]\n");
- printk(KERN_INFO "CPU CAPS: [");
- printed = 0;
- }
- }
+ if (caps & bit)
+ report_one_hwcap(&printed, hwcaps[i]);
}
- printk(KERN_CONT "]\n");
+ if (caps & HWCAP_SPARC_CRYPTO)
+ report_crypto_hwcaps(&printed);
+ if (printed != 0)
+ printk(KERN_CONT "]\n");
}
static unsigned long __init mdesc_cpu_hwcap_list(void)
@@ -411,6 +450,10 @@ static unsigned long __init mdesc_cpu_hwcap_list(void)
break;
}
}
+ for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
+ if (!strcmp(prop, crypto_hwcaps[i]))
+ caps |= HWCAP_SPARC_CRYPTO;
+ }
plen = strlen(prop) + 1;
prop += plen;
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index a53e0a5fd3a..53e48f721ce 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -54,58 +54,6 @@ struct signal_frame32 {
/* __siginfo_rwin_t * */u32 rwin_save;
} __attribute__((aligned(8)));
-typedef struct compat_siginfo{
- int si_signo;
- int si_errno;
- int si_code;
-
- union {
- int _pad[SI_PAD_SIZE32];
-
- /* kill() */
- struct {
- compat_pid_t _pid; /* sender's pid */
- unsigned int _uid; /* sender's uid */
- } _kill;
-
- /* POSIX.1b timers */
- struct {
- compat_timer_t _tid; /* timer id */
- int _overrun; /* overrun count */
- compat_sigval_t _sigval; /* same as below */
- int _sys_private; /* not to be passed to user */
- } _timer;
-
- /* POSIX.1b signals */
- struct {
- compat_pid_t _pid; /* sender's pid */
- unsigned int _uid; /* sender's uid */
- compat_sigval_t _sigval;
- } _rt;
-
- /* SIGCHLD */
- struct {
- compat_pid_t _pid; /* which child */
- unsigned int _uid; /* sender's uid */
- int _status; /* exit code */
- compat_clock_t _utime;
- compat_clock_t _stime;
- } _sigchld;
-
- /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
- struct {
- u32 _addr; /* faulting insn/memory ref. */
- int _trapno;
- } _sigfault;
-
- /* SIGPOLL */
- struct {
- int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
- int _fd;
- } _sigpoll;
- } _sifields;
-}compat_siginfo_t;
-
struct rt_signal_frame32 {
struct sparc_stackf32 ss;
compat_siginfo_t info;
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
index e1fbf8c7578..bde867fd71e 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -176,7 +176,7 @@ sun4v_tsb_miss_common:
sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mov SCRATCHPAD_UTSBREG2, %g5
ldxa [%g5] ASI_SCRATCHPAD, %g5
cmp %g5, -1
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index d97f3eb72e0..44025f4ba41 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -90,7 +90,7 @@ SIGN1(sys32_mkdir, sys_mkdir, %o1)
SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5)
SIGN1(sys32_sysfs, compat_sys_sysfs, %o0)
SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1)
-SIGN2(sys32_sendfile64, compat_sys_sendfile64, %o0, %o1)
+SIGN2(sys32_sendfile64, sys_sendfile, %o0, %o1)
SIGN1(sys32_prctl, sys_prctl, %o0)
SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0)
SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2)
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index f7392336961..d862499eb01 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -506,52 +506,6 @@ long compat_sys_fadvise64_64(int fd,
advice);
}
-asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
- compat_off_t __user *offset,
- compat_size_t count)
-{
- mm_segment_t old_fs = get_fs();
- int ret;
- off_t of;
-
- if (offset && get_user(of, offset))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- ret = sys_sendfile(out_fd, in_fd,
- offset ? (off_t __user *) &of : NULL,
- count);
- set_fs(old_fs);
-
- if (offset && put_user(of, offset))
- return -EFAULT;
-
- return ret;
-}
-
-asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd,
- compat_loff_t __user *offset,
- compat_size_t count)
-{
- mm_segment_t old_fs = get_fs();
- int ret;
- loff_t lof;
-
- if (offset && get_user(lof, offset))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- ret = sys_sendfile64(out_fd, in_fd,
- offset ? (loff_t __user *) &lof : NULL,
- count);
- set_fs(old_fs);
-
- if (offset && put_user(lof, offset))
- return -EFAULT;
-
- return ret;
-}
-
/* This is just a version for 32-bit applications which does
* not force O_LARGEFILE on.
*/
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 3b05e669771..fa1f1d375ff 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -850,7 +850,7 @@ void __init cheetah_ecache_flush_init(void)
ecache_flush_physbase = find_ecache_flush_span(ecache_flush_size);
if (ecache_flush_physbase == ~0UL) {
- prom_printf("cheetah_ecache_flush_init: Cannot find %d byte "
+ prom_printf("cheetah_ecache_flush_init: Cannot find %ld byte "
"contiguous physical memory.\n",
ecache_flush_size);
prom_halt();
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index db15d123f05..d4bdc7a6237 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -49,7 +49,7 @@ tsb_miss_page_table_walk:
/* Before committing to a full page table walk,
* check the huge page TSB.
*/
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
nop
@@ -110,12 +110,9 @@ tsb_miss_page_table_walk:
tsb_miss_page_table_walk_sun4v_fastpath:
USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
- /* Load and check PTE. */
- ldxa [%g5] ASI_PHYS_USE_EC, %g5
- brgez,pn %g5, tsb_do_fault
- nop
+ /* Valid PTE is now in %g5. */
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
661: sethi %uhi(_PAGE_SZALL_4U), %g7
sllx %g7, 32, %g7
.section .sun4v_2insn_patch, "ax"
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index dff4096f3de..8410065f286 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -32,6 +32,9 @@ lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o
lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o
lib-$(CONFIG_SPARC64) += NG2patch.o
+lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o
+lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
+
lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
index 03eadf66b0d..2c20ad63ddb 100644
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -14,7 +14,7 @@
#define FPRS_FEF 0x04
#ifdef MEMCPY_DEBUG
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
- clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
+ clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#else
#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
@@ -182,13 +182,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
cmp %g2, 0
tne %xcc, 5
PREAMBLE
- mov %o0, GLOBAL_SPARE
+ mov %o0, %o3
cmp %o2, 0
be,pn %XCC, 85f
- or %o0, %o1, %o3
+ or %o0, %o1, GLOBAL_SPARE
cmp %o2, 16
blu,a,pn %XCC, 80f
- or %o3, %o2, %o3
+ or GLOBAL_SPARE, %o2, GLOBAL_SPARE
/* 2 blocks (128 bytes) is the minimum we can do the block
* copy with. We need to ensure that we'll iterate at least
@@ -202,7 +202,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
*/
cmp %o2, (4 * 64)
blu,pt %XCC, 75f
- andcc %o3, 0x7, %g0
+ andcc GLOBAL_SPARE, 0x7, %g0
/* %o0: dst
* %o1: src
@@ -404,13 +404,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
* over. If anything is left, we copy it one byte at a time.
*/
brz,pt %o2, 85f
- sub %o0, %o1, %o3
+ sub %o0, %o1, GLOBAL_SPARE
ba,a,pt %XCC, 90f
.align 64
75: /* 16 < len <= 64 */
bne,pn %XCC, 75f
- sub %o0, %o1, %o3
+ sub %o0, %o1, GLOBAL_SPARE
72:
andn %o2, 0xf, %o4
@@ -420,9 +420,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
add %o1, 0x08, %o1
EX_LD(LOAD(ldx, %o1, %g1))
sub %o1, 0x08, %o1
- EX_ST(STORE(stx, %o5, %o1 + %o3))
+ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
add %o1, 0x8, %o1
- EX_ST(STORE(stx, %g1, %o1 + %o3))
+ EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
bgu,pt %XCC, 1b
add %o1, 0x8, %o1
73: andcc %o2, 0x8, %g0
@@ -430,14 +430,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
nop
sub %o2, 0x8, %o2
EX_LD(LOAD(ldx, %o1, %o5))
- EX_ST(STORE(stx, %o5, %o1 + %o3))
+ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0
be,pt %XCC, 1f
nop
sub %o2, 0x4, %o2
EX_LD(LOAD(lduw, %o1, %o5))
- EX_ST(STORE(stw, %o5, %o1 + %o3))
+ EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
add %o1, 0x4, %o1
1: cmp %o2, 0
be,pt %XCC, 85f
@@ -454,11 +454,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1: subcc %g1, 1, %g1
EX_LD(LOAD(ldub, %o1, %o5))
- EX_ST(STORE(stb, %o5, %o1 + %o3))
+ EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
bgu,pt %icc, 1b
add %o1, 1, %o1
-2: add %o1, %o3, %o0
+2: add %o1, GLOBAL_SPARE, %o0
andcc %o1, 0x7, %g1
bne,pt %icc, 8f
sll %g1, 3, %g1
@@ -468,16 +468,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
nop
ba,a,pt %xcc, 73b
-8: mov 64, %o3
+8: mov 64, GLOBAL_SPARE
andn %o1, 0x7, %o1
EX_LD(LOAD(ldx, %o1, %g2))
- sub %o3, %g1, %o3
+ sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
andn %o2, 0x7, %o4
sllx %g2, %g1, %g2
1: add %o1, 0x8, %o1
EX_LD(LOAD(ldx, %o1, %g3))
subcc %o4, 0x8, %o4
- srlx %g3, %o3, %o5
+ srlx %g3, GLOBAL_SPARE, %o5
or %o5, %g2, %o5
EX_ST(STORE(stx, %o5, %o0))
add %o0, 0x8, %o0
@@ -489,32 +489,32 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
be,pn %icc, 85f
add %o1, %g1, %o1
ba,pt %xcc, 90f
- sub %o0, %o1, %o3
+ sub %o0, %o1, GLOBAL_SPARE
.align 64
80: /* 0 < len <= 16 */
- andcc %o3, 0x3, %g0
+ andcc GLOBAL_SPARE, 0x3, %g0
bne,pn %XCC, 90f
- sub %o0, %o1, %o3
+ sub %o0, %o1, GLOBAL_SPARE
1:
subcc %o2, 4, %o2
EX_LD(LOAD(lduw, %o1, %g1))
- EX_ST(STORE(stw, %g1, %o1 + %o3))
+ EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
bgu,pt %XCC, 1b
add %o1, 4, %o1
85: retl
- mov EX_RETVAL(GLOBAL_SPARE), %o0
+ mov EX_RETVAL(%o3), %o0
.align 32
90:
subcc %o2, 1, %o2
EX_LD(LOAD(ldub, %o1, %g1))
- EX_ST(STORE(stb, %g1, %o1 + %o3))
+ EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
- mov EX_RETVAL(GLOBAL_SPARE), %o0
+ mov EX_RETVAL(%o3), %o0
.size FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG4clear_page.S b/arch/sparc/lib/NG4clear_page.S
new file mode 100644
index 00000000000..e16c88204a4
--- /dev/null
+++ b/arch/sparc/lib/NG4clear_page.S
@@ -0,0 +1,29 @@
+/* NG4copy_page.S: Niagara-4 optimized clear page.
+ *
+ * Copyright (C) 2012 (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+#include <asm/page.h>
+
+ .text
+
+ .register %g3, #scratch
+
+ .align 32
+ .globl NG4clear_page
+ .globl NG4clear_user_page
+NG4clear_page: /* %o0=dest */
+NG4clear_user_page: /* %o0=dest, %o1=vaddr */
+ set PAGE_SIZE, %g7
+ mov 0x20, %g3
+1: stxa %g0, [%o0 + %g0] ASI_ST_BLKINIT_MRU_P
+ subcc %g7, 0x40, %g7
+ stxa %g0, [%o0 + %g3] ASI_ST_BLKINIT_MRU_P
+ bne,pt %xcc, 1b
+ add %o0, 0x40, %o0
+ membar #StoreLoad|#StoreStore
+ retl
+ nop
+ .size NG4clear_page,.-NG4clear_page
+ .size NG4clear_user_page,.-NG4clear_user_page \ No newline at end of file
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
new file mode 100644
index 00000000000..fd9f903ffa3
--- /dev/null
+++ b/arch/sparc/lib/NG4copy_from_user.S
@@ -0,0 +1,30 @@
+/* NG4copy_from_user.S: Niagara-4 optimized copy from userspace.
+ *
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_asi;\
+ .text; \
+ .align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS 0x11
+#endif
+
+#define FUNC_NAME NG4copy_from_user
+#define LOAD(type,addr,dest) type##a [addr] %asi, dest
+#define EX_RETVAL(x) 0
+
+#ifdef __KERNEL__
+#define PREAMBLE \
+ rd %asi, %g1; \
+ cmp %g1, ASI_AIUS; \
+ bne,pn %icc, ___copy_in_user; \
+ nop
+#endif
+
+#include "NG4memcpy.S"
diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S
new file mode 100644
index 00000000000..28504e88c53
--- /dev/null
+++ b/arch/sparc/lib/NG4copy_page.S
@@ -0,0 +1,57 @@
+/* NG4copy_page.S: Niagara-4 optimized copy page.
+ *
+ * Copyright (C) 2012 (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+#include <asm/page.h>
+
+ .text
+ .align 32
+
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+ .globl NG4copy_user_page
+NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
+ prefetch [%o1 + 0x000], #n_reads_strong
+ prefetch [%o1 + 0x040], #n_reads_strong
+ prefetch [%o1 + 0x080], #n_reads_strong
+ prefetch [%o1 + 0x0c0], #n_reads_strong
+ set PAGE_SIZE, %g7
+ prefetch [%o1 + 0x100], #n_reads_strong
+ prefetch [%o1 + 0x140], #n_reads_strong
+ prefetch [%o1 + 0x180], #n_reads_strong
+ prefetch [%o1 + 0x1c0], #n_reads_strong
+1:
+ ldx [%o1 + 0x00], %o2
+ subcc %g7, 0x40, %g7
+ ldx [%o1 + 0x08], %o3
+ ldx [%o1 + 0x10], %o4
+ ldx [%o1 + 0x18], %o5
+ ldx [%o1 + 0x20], %g1
+ stxa %o2, [%o0] ASI_ST_BLKINIT_MRU_P
+ add %o0, 0x08, %o0
+ ldx [%o1 + 0x28], %g2
+ stxa %o3, [%o0] ASI_ST_BLKINIT_MRU_P
+ add %o0, 0x08, %o0
+ ldx [%o1 + 0x30], %g3
+ stxa %o4, [%o0] ASI_ST_BLKINIT_MRU_P
+ add %o0, 0x08, %o0
+ ldx [%o1 + 0x38], %o2
+ add %o1, 0x40, %o1
+ stxa %o5, [%o0] ASI_ST_BLKINIT_MRU_P
+ add %o0, 0x08, %o0
+ stxa %g1, [%o0] ASI_ST_BLKINIT_MRU_P
+ add %o0, 0x08, %o0
+ stxa %g2, [%o0] ASI_ST_BLKINIT_MRU_P
+ add %o0, 0x08, %o0
+ stxa %g3, [%o0] ASI_ST_BLKINIT_MRU_P
+ add %o0, 0x08, %o0
+ stxa %o2, [%o0] ASI_ST_BLKINIT_MRU_P
+ add %o0, 0x08, %o0
+ bne,pt %icc, 1b
+ prefetch [%o1 + 0x200], #n_reads_strong
+ retl
+ membar #StoreLoad | #StoreStore
+ .size NG4copy_user_page,.-NG4copy_user_page
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
new file mode 100644
index 00000000000..9744c4540a8
--- /dev/null
+++ b/arch/sparc/lib/NG4copy_to_user.S
@@ -0,0 +1,39 @@
+/* NG4copy_to_user.S: Niagara-4 optimized copy to userspace.
+ *
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_asi;\
+ .text; \
+ .align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS 0x11
+#endif
+
+#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
+#endif
+
+#define FUNC_NAME NG4copy_to_user
+#define STORE(type,src,addr) type##a src, [addr] %asi
+#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS
+#define EX_RETVAL(x) 0
+
+#ifdef __KERNEL__
+ /* Writing to %asi is _expensive_ so we hardcode it.
+ * Reading %asi to check for KERNEL_DS is comparatively
+ * cheap.
+ */
+#define PREAMBLE \
+ rd %asi, %g1; \
+ cmp %g1, ASI_AIUS; \
+ bne,pn %icc, ___copy_in_user; \
+ nop
+#endif
+
+#include "NG4memcpy.S"
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
new file mode 100644
index 00000000000..9cf2ee01cee
--- /dev/null
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -0,0 +1,360 @@
+/* NG4memcpy.S: Niagara-4 optimized memcpy.
+ *
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef __KERNEL__
+#include <asm/visasm.h>
+#include <asm/asi.h>
+#define GLOBAL_SPARE %g7
+#else
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+#define FPRS_FEF 0x04
+
+/* On T4 it is very expensive to access ASRs like %fprs and
+ * %asi, avoiding a read or a write can save ~50 cycles.
+ */
+#define FPU_ENTER \
+ rd %fprs, %o5; \
+ andcc %o5, FPRS_FEF, %g0; \
+ be,a,pn %icc, 999f; \
+ wr %g0, FPRS_FEF, %fprs; \
+ 999:
+
+#ifdef MEMCPY_DEBUG
+#define VISEntryHalf FPU_ENTER; \
+ clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#else
+#define VISEntryHalf FPU_ENTER
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#endif
+
+#define GLOBAL_SPARE %g5
+#endif
+
+#ifndef STORE_ASI
+#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
+#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
+#else
+#define STORE_ASI 0x80 /* ASI_P */
+#endif
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x) x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x) x
+#endif
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest) type [addr], dest
+#endif
+
+#ifndef STORE
+#ifndef MEMCPY_DEBUG
+#define STORE(type,src,addr) type src, [addr]
+#else
+#define STORE(type,src,addr) type##a src, [addr] %asi
+#endif
+#endif
+
+#ifndef STORE_INIT
+#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME NG4memcpy
+#endif
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+ .text
+ .align 64
+
+ .globl FUNC_NAME
+ .type FUNC_NAME,#function
+FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+#ifdef MEMCPY_DEBUG
+ wr %g0, 0x80, %asi
+#endif
+ srlx %o2, 31, %g2
+ cmp %g2, 0
+ tne %XCC, 5
+ PREAMBLE
+ mov %o0, %o3
+ brz,pn %o2, .Lexit
+ cmp %o2, 3
+ ble,pn %icc, .Ltiny
+ cmp %o2, 19
+ ble,pn %icc, .Lsmall
+ or %o0, %o1, %g2
+ cmp %o2, 128
+ bl,pn %icc, .Lmedium
+ nop
+
+.Llarge:/* len >= 0x80 */
+ /* First get dest 8 byte aligned. */
+ sub %g0, %o0, %g1
+ and %g1, 0x7, %g1
+ brz,pt %g1, 51f
+ sub %o2, %g1, %o2
+
+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+ add %o1, 1, %o1
+ subcc %g1, 1, %g1
+ add %o0, 1, %o0
+ bne,pt %icc, 1b
+ EX_ST(STORE(stb, %g2, %o0 - 0x01))
+
+51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
+ LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
+ LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong)
+ LOAD(prefetch, %o1 + 0x100, #n_reads_strong)
+ LOAD(prefetch, %o1 + 0x140, #n_reads_strong)
+ LOAD(prefetch, %o1 + 0x180, #n_reads_strong)
+ LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong)
+ LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
+
+ /* Check if we can use the straight fully aligned
+ * loop, or we require the alignaddr/faligndata variant.
+ */
+ andcc %o1, 0x7, %o5
+ bne,pn %icc, .Llarge_src_unaligned
+ sub %g0, %o0, %g1
+
+ /* Legitimize the use of initializing stores by getting dest
+ * to be 64-byte aligned.
+ */
+ and %g1, 0x3f, %g1
+ brz,pt %g1, .Llarge_aligned
+ sub %o2, %g1, %o2
+
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
+ add %o1, 8, %o1
+ subcc %g1, 8, %g1
+ add %o0, 8, %o0
+ bne,pt %icc, 1b
+ EX_ST(STORE(stx, %g2, %o0 - 0x08))
+
+.Llarge_aligned:
+ /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
+ andn %o2, 0x3f, %o4
+ sub %o2, %o4, %o2
+
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+ add %o1, 0x40, %o1
+ EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
+ subcc %o4, 0x40, %o4
+ EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
+ EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
+ EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
+ EX_ST(STORE_INIT(%g1, %o0))
+ add %o0, 0x08, %o0
+ EX_ST(STORE_INIT(%g2, %o0))
+ add %o0, 0x08, %o0
+ EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
+ EX_ST(STORE_INIT(%g3, %o0))
+ add %o0, 0x08, %o0
+ EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
+ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+ add %o0, 0x08, %o0
+ EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
+ EX_ST(STORE_INIT(%o5, %o0))
+ add %o0, 0x08, %o0
+ EX_ST(STORE_INIT(%g2, %o0))
+ add %o0, 0x08, %o0
+ EX_ST(STORE_INIT(%g3, %o0))
+ add %o0, 0x08, %o0
+ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+ add %o0, 0x08, %o0
+ bne,pt %icc, 1b
+ LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
+
+ membar #StoreLoad | #StoreStore
+
+ brz,pn %o2, .Lexit
+ cmp %o2, 19
+ ble,pn %icc, .Lsmall_unaligned
+ nop
+ ba,a,pt %icc, .Lmedium_noprefetch
+
+.Lexit: retl
+ mov EX_RETVAL(%o3), %o0
+
+.Llarge_src_unaligned:
+ andn %o2, 0x3f, %o4
+ sub %o2, %o4, %o2
+ VISEntryHalf
+ alignaddr %o1, %g0, %g1
+ add %o1, %o4, %o1
+ EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
+1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2))
+ subcc %o4, 0x40, %o4
+ EX_LD(LOAD(ldd, %g1 + 0x10, %f4))
+ EX_LD(LOAD(ldd, %g1 + 0x18, %f6))
+ EX_LD(LOAD(ldd, %g1 + 0x20, %f8))
+ EX_LD(LOAD(ldd, %g1 + 0x28, %f10))
+ EX_LD(LOAD(ldd, %g1 + 0x30, %f12))
+ EX_LD(LOAD(ldd, %g1 + 0x38, %f14))
+ faligndata %f0, %f2, %f16
+ EX_LD(LOAD(ldd, %g1 + 0x40, %f0))
+ faligndata %f2, %f4, %f18
+ add %g1, 0x40, %g1
+ faligndata %f4, %f6, %f20
+ faligndata %f6, %f8, %f22
+ faligndata %f8, %f10, %f24
+ faligndata %f10, %f12, %f26
+ faligndata %f12, %f14, %f28
+ faligndata %f14, %f0, %f30
+ EX_ST(STORE(std, %f16, %o0 + 0x00))
+ EX_ST(STORE(std, %f18, %o0 + 0x08))
+ EX_ST(STORE(std, %f20, %o0 + 0x10))
+ EX_ST(STORE(std, %f22, %o0 + 0x18))
+ EX_ST(STORE(std, %f24, %o0 + 0x20))
+ EX_ST(STORE(std, %f26, %o0 + 0x28))
+ EX_ST(STORE(std, %f28, %o0 + 0x30))
+ EX_ST(STORE(std, %f30, %o0 + 0x38))
+ add %o0, 0x40, %o0
+ bne,pt %icc, 1b
+ LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
+ VISExitHalf
+
+ brz,pn %o2, .Lexit
+ cmp %o2, 19
+ ble,pn %icc, .Lsmall_unaligned
+ nop
+ ba,a,pt %icc, .Lmedium_unaligned
+
+.Lmedium:
+ LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
+ andcc %g2, 0x7, %g0
+ bne,pn %icc, .Lmedium_unaligned
+ nop
+.Lmedium_noprefetch:
+ andncc %o2, 0x20 - 1, %o5
+ be,pn %icc, 2f
+ sub %o2, %o5, %o2
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+ EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
+ EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
+ EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
+ add %o1, 0x20, %o1
+ subcc %o5, 0x20, %o5
+ EX_ST(STORE(stx, %g1, %o0 + 0x00))
+ EX_ST(STORE(stx, %g2, %o0 + 0x08))
+ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
+ EX_ST(STORE(stx, %o4, %o0 + 0x18))
+ bne,pt %icc, 1b
+ add %o0, 0x20, %o0
+2: andcc %o2, 0x18, %o5
+ be,pt %icc, 3f
+ sub %o2, %o5, %o2
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+ add %o1, 0x08, %o1
+ add %o0, 0x08, %o0
+ subcc %o5, 0x08, %o5
+ bne,pt %icc, 1b
+ EX_ST(STORE(stx, %g1, %o0 - 0x08))
+3: brz,pt %o2, .Lexit
+ cmp %o2, 0x04
+ bl,pn %icc, .Ltiny
+ nop
+ EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+ add %o1, 0x04, %o1
+ add %o0, 0x04, %o0
+ subcc %o2, 0x04, %o2
+ bne,pn %icc, .Ltiny
+ EX_ST(STORE(stw, %g1, %o0 - 0x04))
+ ba,a,pt %icc, .Lexit
+.Lmedium_unaligned:
+ /* First get dest 8 byte aligned. */
+ sub %g0, %o0, %g1
+ and %g1, 0x7, %g1
+ brz,pt %g1, 2f
+ sub %o2, %g1, %o2
+
+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+ add %o1, 1, %o1
+ subcc %g1, 1, %g1
+ add %o0, 1, %o0
+ bne,pt %icc, 1b
+ EX_ST(STORE(stb, %g2, %o0 - 0x01))
+2:
+ and %o1, 0x7, %g1
+ brz,pn %g1, .Lmedium_noprefetch
+ sll %g1, 3, %g1
+ mov 64, %g2
+ sub %g2, %g1, %g2
+ andn %o1, 0x7, %o1
+ EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
+ sllx %o4, %g1, %o4
+ andn %o2, 0x08 - 1, %o5
+ sub %o2, %o5, %o2
+1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
+ add %o1, 0x08, %o1
+ subcc %o5, 0x08, %o5
+ srlx %g3, %g2, GLOBAL_SPARE
+ or GLOBAL_SPARE, %o4, GLOBAL_SPARE
+ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
+ add %o0, 0x08, %o0
+ bne,pt %icc, 1b
+ sllx %g3, %g1, %o4
+ srl %g1, 3, %g1
+ add %o1, %g1, %o1
+ brz,pn %o2, .Lexit
+ nop
+ ba,pt %icc, .Lsmall_unaligned
+
+.Ltiny:
+ EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+ subcc %o2, 1, %o2
+ be,pn %icc, .Lexit
+ EX_ST(STORE(stb, %g1, %o0 + 0x00))
+ EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
+ subcc %o2, 1, %o2
+ be,pn %icc, .Lexit
+ EX_ST(STORE(stb, %g1, %o0 + 0x01))
+ EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
+ ba,pt %icc, .Lexit
+ EX_ST(STORE(stb, %g1, %o0 + 0x02))
+
+.Lsmall:
+ andcc %g2, 0x3, %g0
+ bne,pn %icc, .Lsmall_unaligned
+ andn %o2, 0x4 - 1, %o5
+ sub %o2, %o5, %o2
+1:
+ EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+ add %o1, 0x04, %o1
+ subcc %o5, 0x04, %o5
+ add %o0, 0x04, %o0
+ bne,pt %icc, 1b
+ EX_ST(STORE(stw, %g1, %o0 - 0x04))
+ brz,pt %o2, .Lexit
+ nop
+ ba,a,pt %icc, .Ltiny
+
+.Lsmall_unaligned:
+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+ add %o1, 1, %o1
+ add %o0, 1, %o0
+ subcc %o2, 1, %o2
+ bne,pt %icc, 1b
+ EX_ST(STORE(stb, %g1, %o0 - 0x01))
+ ba,a,pt %icc, .Lexit
+ .size FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S
new file mode 100644
index 00000000000..41da4bdd95c
--- /dev/null
+++ b/arch/sparc/lib/NG4memset.S
@@ -0,0 +1,105 @@
+/* NG4memset.S: Niagara-4 optimized memset/bzero.
+ *
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+ .text
+ .align 32
+ .globl NG4memset
+NG4memset:
+ andcc %o1, 0xff, %o4
+ be,pt %icc, 1f
+ mov %o2, %o1
+ sllx %o4, 8, %g1
+ or %g1, %o4, %o2
+ sllx %o2, 16, %g1
+ or %g1, %o2, %o2
+ sllx %o2, 32, %g1
+ ba,pt %icc, 1f
+ or %g1, %o2, %o4
+ .size NG4memset,.-NG4memset
+
+ .align 32
+ .globl NG4bzero
+NG4bzero:
+ clr %o4
+1: cmp %o1, 16
+ ble %icc, .Ltiny
+ mov %o0, %o3
+ sub %g0, %o0, %g1
+ and %g1, 0x7, %g1
+ brz,pt %g1, .Laligned8
+ sub %o1, %g1, %o1
+1: stb %o4, [%o0 + 0x00]
+ subcc %g1, 1, %g1
+ bne,pt %icc, 1b
+ add %o0, 1, %o0
+.Laligned8:
+ cmp %o1, 64 + (64 - 8)
+ ble .Lmedium
+ sub %g0, %o0, %g1
+ andcc %g1, (64 - 1), %g1
+ brz,pn %g1, .Laligned64
+ sub %o1, %g1, %o1
+1: stx %o4, [%o0 + 0x00]
+ subcc %g1, 8, %g1
+ bne,pt %icc, 1b
+ add %o0, 0x8, %o0
+.Laligned64:
+ andn %o1, 64 - 1, %g1
+ sub %o1, %g1, %o1
+ brnz,pn %o4, .Lnon_bzero_loop
+ mov 0x20, %g2
+1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ subcc %g1, 0x40, %g1
+ stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ bne,pt %icc, 1b
+ add %o0, 0x40, %o0
+.Lpostloop:
+ cmp %o1, 8
+ bl,pn %icc, .Ltiny
+ membar #StoreStore|#StoreLoad
+.Lmedium:
+ andn %o1, 0x7, %g1
+ sub %o1, %g1, %o1
+1: stx %o4, [%o0 + 0x00]
+ subcc %g1, 0x8, %g1
+ bne,pt %icc, 1b
+ add %o0, 0x08, %o0
+ andcc %o1, 0x4, %g1
+ be,pt %icc, .Ltiny
+ sub %o1, %g1, %o1
+ stw %o4, [%o0 + 0x00]
+ add %o0, 0x4, %o0
+.Ltiny:
+ cmp %o1, 0
+ be,pn %icc, .Lexit
+1: subcc %o1, 1, %o1
+ stb %o4, [%o0 + 0x00]
+ bne,pt %icc, 1b
+ add %o0, 1, %o0
+.Lexit:
+ retl
+ mov %o3, %o0
+.Lnon_bzero_loop:
+ mov 0x08, %g3
+ mov 0x28, %o5
+1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ subcc %g1, 0x40, %g1
+ stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
+ add %o0, 0x10, %o0
+ stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
+ bne,pt %icc, 1b
+ add %o0, 0x30, %o0
+ ba,a,pt %icc, .Lpostloop
+ .size NG4bzero,.-NG4bzero
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
new file mode 100644
index 00000000000..a114cbcf2a4
--- /dev/null
+++ b/arch/sparc/lib/NG4patch.S
@@ -0,0 +1,54 @@
+/* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant.
+ *
+ * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
+ */
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define NG_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl niagara4_patch_copyops
+ .type niagara4_patch_copyops,#function
+niagara4_patch_copyops:
+ NG_DO_PATCH(memcpy, NG4memcpy)
+ NG_DO_PATCH(___copy_from_user, NG4copy_from_user)
+ NG_DO_PATCH(___copy_to_user, NG4copy_to_user)
+ retl
+ nop
+ .size niagara4_patch_copyops,.-niagara4_patch_copyops
+
+ .globl niagara4_patch_bzero
+ .type niagara4_patch_bzero,#function
+niagara4_patch_bzero:
+ NG_DO_PATCH(memset, NG4memset)
+ NG_DO_PATCH(__bzero, NG4bzero)
+ NG_DO_PATCH(__clear_user, NGclear_user)
+ NG_DO_PATCH(tsb_init, NGtsb_init)
+ retl
+ nop
+ .size niagara4_patch_bzero,.-niagara4_patch_bzero
+
+ .globl niagara4_patch_pageops
+ .type niagara4_patch_pageops,#function
+niagara4_patch_pageops:
+ NG_DO_PATCH(copy_user_page, NG4copy_user_page)
+ NG_DO_PATCH(_clear_page, NG4clear_page)
+ NG_DO_PATCH(clear_user_page, NG4clear_user_page)
+ retl
+ nop
+ .size niagara4_patch_pageops,.-niagara4_patch_pageops
diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S
index b9e790b9c6b..423d46e2258 100644
--- a/arch/sparc/lib/NGpage.S
+++ b/arch/sparc/lib/NGpage.S
@@ -59,6 +59,8 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
restore
.align 32
+ .globl NGclear_page
+ .globl NGclear_user_page
NGclear_page: /* %o0=dest */
NGclear_user_page: /* %o0=dest, %o1=vaddr */
rd %asi, %g3
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 3b31218cafc..ee31b884c61 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -134,6 +134,10 @@ EXPORT_SYMBOL(copy_user_page);
void VISenter(void);
EXPORT_SYMBOL(VISenter);
+/* CRYPTO code needs this */
+void VISenterhalf(void);
+EXPORT_SYMBOL(VISenterhalf);
+
extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
unsigned long *);
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 77ac917be15..e98bfda205a 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -265,6 +265,7 @@ good_area:
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 1fe0429b631..2976dba1eba 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -452,6 +452,7 @@ good_area:
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
@@ -464,13 +465,13 @@ good_area:
up_read(&mm->mmap_sem);
mm_rss = get_mm_rss(mm);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
#endif
if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
tsb_grow(mm, MM_TSB_BASE, mm_rss);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm_rss = mm->context.huge_pte_count;
if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 07e14535375..f76f83d5ac6 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -303,53 +303,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
{
return NULL;
}
-
-static void context_reload(void *__data)
-{
- struct mm_struct *mm = __data;
-
- if (mm == current->mm)
- load_secondary_context(mm);
-}
-
-void hugetlb_prefault_arch_hook(struct mm_struct *mm)
-{
- struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
-
- if (likely(tp->tsb != NULL))
- return;
-
- tsb_grow(mm, MM_TSB_HUGE, 0);
- tsb_context_switch(mm);
- smp_tsb_sync(mm);
-
- /* On UltraSPARC-III+ and later, configure the second half of
- * the Data-TLB for huge pages.
- */
- if (tlb_type == cheetah_plus) {
- unsigned long ctx;
-
- spin_lock(&ctx_alloc_lock);
- ctx = mm->context.sparc64_ctx_val;
- ctx &= ~CTX_PGSZ_MASK;
- ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
- ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
-
- if (ctx != mm->context.sparc64_ctx_val) {
- /* When changing the page size fields, we
- * must perform a context flush so that no
- * stale entries match. This flush must
- * occur with the original context register
- * settings.
- */
- do_flush_tlb_mm(mm);
-
- /* Reload the context register of all processors
- * also executing in this address space.
- */
- mm->context.sparc64_ctx_val = ctx;
- on_each_cpu(context_reload, mm, 0);
- }
- spin_unlock(&ctx_alloc_lock);
- }
-}
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index d58edf5fefd..9e28a118e6a 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -51,22 +51,40 @@
#include "init_64.h"
-unsigned long kern_linear_pte_xor[2] __read_mostly;
+unsigned long kern_linear_pte_xor[4] __read_mostly;
-/* A bitmap, one bit for every 256MB of physical memory. If the bit
- * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else
- * if set we should use a 256MB page (via kern_linear_pte_xor[1]).
+/* A bitmap, two bits for every 256MB of physical memory. These two
+ * bits determine what page size we use for kernel linear
+ * translations. They form an index into kern_linear_pte_xor[]. The
+ * value in the indexed slot is XOR'd with the TLB miss virtual
+ * address to form the resulting TTE. The mapping is:
+ *
+ * 0 ==> 4MB
+ * 1 ==> 256MB
+ * 2 ==> 2GB
+ * 3 ==> 16GB
+ *
+ * All sun4v chips support 256MB pages. Only SPARC-T4 and later
+ * support 2GB pages, and hopefully future cpus will support the 16GB
+ * pages as well. For slots 2 and 3, we encode a 256MB TTE xor there
+ * if these larger page sizes are not supported by the cpu.
+ *
+ * It would be nice to determine this from the machine description
+ * 'cpu' properties, but we need to have this table setup before the
+ * MDESC is initialized.
*/
unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
#ifndef CONFIG_DEBUG_PAGEALLOC
-/* A special kernel TSB for 4MB and 256MB linear mappings.
- * Space is allocated for this right after the trap table
- * in arch/sparc64/kernel/head.S
+/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
+ * Space is allocated for this right after the trap table in
+ * arch/sparc64/kernel/head.S
*/
extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
#endif
+static unsigned long cpu_pgsz_mask;
+
#define MAX_BANKS 32
static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata;
@@ -101,7 +119,8 @@ static void __init read_obp_memory(const char *property,
ret = prom_getproperty(node, property, (char *) regs, prop_size);
if (ret == -1) {
- prom_printf("Couldn't get %s property from /memory.\n");
+ prom_printf("Couldn't get %s property from /memory.\n",
+ property);
prom_halt();
}
@@ -257,7 +276,6 @@ static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long
}
unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
-unsigned long _PAGE_SZBITS __read_mostly;
static void flush_dcache(unsigned long pfn)
{
@@ -288,12 +306,24 @@ static void flush_dcache(unsigned long pfn)
}
}
+/* mm->context.lock must be held */
+static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
+ unsigned long tsb_hash_shift, unsigned long address,
+ unsigned long tte)
+{
+ struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
+ unsigned long tag;
+
+ tsb += ((address >> tsb_hash_shift) &
+ (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
+ tag = (address >> 22UL);
+ tsb_insert(tsb, tag, tte);
+}
+
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
{
+ unsigned long tsb_index, tsb_hash_shift, flags;
struct mm_struct *mm;
- struct tsb *tsb;
- unsigned long tag, flags;
- unsigned long tsb_index, tsb_hash_shift;
pte_t pte = *ptep;
if (tlb_type != hypervisor) {
@@ -310,7 +340,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
spin_lock_irqsave(&mm->context.lock, flags);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
if ((tlb_type == hypervisor &&
(pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
@@ -322,11 +352,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
}
#endif
- tsb = mm->context.tsb_block[tsb_index].tsb;
- tsb += ((address >> tsb_hash_shift) &
- (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
- tag = (address >> 22UL);
- tsb_insert(tsb, tag, pte_val(pte));
+ __update_mmu_tsb_insert(mm, tsb_index, tsb_hash_shift,
+ address, pte_val(pte));
spin_unlock_irqrestore(&mm->context.lock, flags);
}
@@ -403,6 +430,12 @@ EXPORT_SYMBOL(flush_icache_range);
void mmu_info(struct seq_file *m)
{
+ static const char *pgsz_strings[] = {
+ "8K", "64K", "512K", "4MB", "32MB",
+ "256MB", "2GB", "16GB",
+ };
+ int i, printed;
+
if (tlb_type == cheetah)
seq_printf(m, "MMU Type\t: Cheetah\n");
else if (tlb_type == cheetah_plus)
@@ -414,6 +447,17 @@ void mmu_info(struct seq_file *m)
else
seq_printf(m, "MMU Type\t: ???\n");
+ seq_printf(m, "MMU PGSZs\t: ");
+ printed = 0;
+ for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
+ if (cpu_pgsz_mask & (1UL << i)) {
+ seq_printf(m, "%s%s",
+ printed ? "," : "", pgsz_strings[i]);
+ printed++;
+ }
+ }
+ seq_putc(m, '\n');
+
#ifdef CONFIG_DEBUG_DCFLUSH
seq_printf(m, "DCPageFlushes\t: %d\n",
atomic_read(&dcpage_flushes));
@@ -462,7 +506,7 @@ static void __init read_obp_translations(void)
prom_halt();
}
if (unlikely(n > sizeof(prom_trans))) {
- prom_printf("prom_mappings: Size %Zd is too big.\n", n);
+ prom_printf("prom_mappings: Size %d is too big.\n", n);
prom_halt();
}
@@ -524,7 +568,7 @@ static void __init hypervisor_tlb_lock(unsigned long vaddr,
unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu);
if (ret != 0) {
- prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: "
+ prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: "
"errors with %lx\n", vaddr, 0, pte, mmu, ret);
prom_halt();
}
@@ -1358,32 +1402,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
extern unsigned int kvmap_linear_patch[1];
#endif /* CONFIG_DEBUG_PAGEALLOC */
-static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
+static void __init kpte_set_val(unsigned long index, unsigned long val)
{
- const unsigned long shift_256MB = 28;
- const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL);
- const unsigned long size_256MB = (1UL << shift_256MB);
+ unsigned long *ptr = kpte_linear_bitmap;
- while (start < end) {
- long remains;
+ val <<= ((index % (BITS_PER_LONG / 2)) * 2);
+ ptr += (index / (BITS_PER_LONG / 2));
- remains = end - start;
- if (remains < size_256MB)
- break;
+ *ptr |= val;
+}
- if (start & mask_256MB) {
- start = (start + size_256MB) & ~mask_256MB;
- continue;
- }
+static const unsigned long kpte_shift_min = 28; /* 256MB */
+static const unsigned long kpte_shift_max = 34; /* 16GB */
+static const unsigned long kpte_shift_incr = 3;
- while (remains >= size_256MB) {
- unsigned long index = start >> shift_256MB;
+static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
+ unsigned long shift)
+{
+ unsigned long size = (1UL << shift);
+ unsigned long mask = (size - 1UL);
+ unsigned long remains = end - start;
+ unsigned long val;
- __set_bit(index, kpte_linear_bitmap);
+ if (remains < size || (start & mask))
+ return start;
- start += size_256MB;
- remains -= size_256MB;
+ /* VAL maps:
+ *
+ * shift 28 --> kern_linear_pte_xor index 1
+ * shift 31 --> kern_linear_pte_xor index 2
+ * shift 34 --> kern_linear_pte_xor index 3
+ */
+ val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
+
+ remains &= ~mask;
+ if (shift != kpte_shift_max)
+ remains = size;
+
+ while (remains) {
+ unsigned long index = start >> kpte_shift_min;
+
+ kpte_set_val(index, val);
+
+ start += 1UL << kpte_shift_min;
+ remains -= 1UL << kpte_shift_min;
+ }
+
+ return start;
+}
+
+static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
+{
+ unsigned long smallest_size, smallest_mask;
+ unsigned long s;
+
+ smallest_size = (1UL << kpte_shift_min);
+ smallest_mask = (smallest_size - 1UL);
+
+ while (start < end) {
+ unsigned long orig_start = start;
+
+ for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
+ start = kpte_mark_using_shift(start, end, s);
+
+ if (start != orig_start)
+ break;
}
+
+ if (start == orig_start)
+ start = (start + smallest_size) & ~smallest_mask;
}
}
@@ -1577,13 +1664,16 @@ static void __init sun4v_ktsb_init(void)
ktsb_descr[0].resv = 0;
#ifndef CONFIG_DEBUG_PAGEALLOC
- /* Second KTSB for 4MB/256MB mappings. */
+ /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */
ktsb_pa = (kern_base +
((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
- ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB |
- HV_PGSZ_MASK_256MB);
+ ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB |
+ HV_PGSZ_MASK_256MB |
+ HV_PGSZ_MASK_2GB |
+ HV_PGSZ_MASK_16GB) &
+ cpu_pgsz_mask);
ktsb_descr[1].assoc = 1;
ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
ktsb_descr[1].ctx_idx = 0;
@@ -1606,6 +1696,47 @@ void __cpuinit sun4v_ktsb_register(void)
}
}
+static void __init sun4u_linear_pte_xor_finalize(void)
+{
+#ifndef CONFIG_DEBUG_PAGEALLOC
+ /* This is where we would add Panther support for
+ * 32MB and 256MB pages.
+ */
+#endif
+}
+
+static void __init sun4v_linear_pte_xor_finalize(void)
+{
+#ifndef CONFIG_DEBUG_PAGEALLOC
+ if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
+ kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
+ 0xfffff80000000000UL;
+ kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ _PAGE_P_4V | _PAGE_W_4V);
+ } else {
+ kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
+ }
+
+ if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
+ kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
+ 0xfffff80000000000UL;
+ kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ _PAGE_P_4V | _PAGE_W_4V);
+ } else {
+ kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
+ }
+
+ if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
+ kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
+ 0xfffff80000000000UL;
+ kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ _PAGE_P_4V | _PAGE_W_4V);
+ } else {
+ kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
+ }
+#endif
+}
+
/* paging_init() sets up the page tables */
static unsigned long last_valid_pfn;
@@ -1665,10 +1796,8 @@ void __init paging_init(void)
ktsb_phys_patch();
}
- if (tlb_type == hypervisor) {
+ if (tlb_type == hypervisor)
sun4v_patch_tlb_handlers();
- sun4v_ktsb_init();
- }
/* Find available physical memory...
*
@@ -1727,9 +1856,6 @@ void __init paging_init(void)
__flush_tlb_all();
- if (tlb_type == hypervisor)
- sun4v_ktsb_register();
-
prom_build_devicetree();
of_populate_present_mask();
#ifndef CONFIG_SMP
@@ -1742,8 +1868,36 @@ void __init paging_init(void)
#ifndef CONFIG_SMP
mdesc_fill_in_cpu_data(cpu_all_mask);
#endif
+ mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask);
+
+ sun4v_linear_pte_xor_finalize();
+
+ sun4v_ktsb_init();
+ sun4v_ktsb_register();
+ } else {
+ unsigned long impl, ver;
+
+ cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
+ HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
+
+ __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+ impl = ((ver >> 32) & 0xffff);
+ if (impl == PANTHER_IMPL)
+ cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB |
+ HV_PGSZ_MASK_256MB);
+
+ sun4u_linear_pte_xor_finalize();
}
+ /* Flush the TLBs and the 4M TSB so that the updated linear
+ * pte XOR settings are realized for all mappings.
+ */
+ __flush_tlb_all();
+#ifndef CONFIG_DEBUG_PAGEALLOC
+ memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
+#endif
+ __flush_tlb_all();
+
/* Setup bootmem... */
last_valid_pfn = end_pfn = bootmem_init(phys_base);
@@ -2110,6 +2264,7 @@ static void __init sun4u_pgprot_init(void)
{
unsigned long page_none, page_shared, page_copy, page_readonly;
unsigned long page_exec_bit;
+ int i;
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
_PAGE_CACHE_4U | _PAGE_P_4U |
@@ -2128,8 +2283,7 @@ static void __init sun4u_pgprot_init(void)
__ACCESS_BITS_4U | _PAGE_E_4U);
#ifdef CONFIG_DEBUG_PAGEALLOC
- kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4U) ^
- 0xfffff80000000000UL;
+ kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
#else
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
0xfffff80000000000UL;
@@ -2137,10 +2291,9 @@ static void __init sun4u_pgprot_init(void)
kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
_PAGE_P_4U | _PAGE_W_4U);
- /* XXX Should use 256MB on Panther. XXX */
- kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
+ for (i = 1; i < 4; i++)
+ kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
- _PAGE_SZBITS = _PAGE_SZBITS_4U;
_PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
_PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
_PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
@@ -2164,6 +2317,7 @@ static void __init sun4v_pgprot_init(void)
{
unsigned long page_none, page_shared, page_copy, page_readonly;
unsigned long page_exec_bit;
+ int i;
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
_PAGE_CACHE_4V | _PAGE_P_4V |
@@ -2176,8 +2330,7 @@ static void __init sun4v_pgprot_init(void)
_PAGE_CACHE = _PAGE_CACHE_4V;
#ifdef CONFIG_DEBUG_PAGEALLOC
- kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^
- 0xfffff80000000000UL;
+ kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
#else
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
0xfffff80000000000UL;
@@ -2185,20 +2338,12 @@ static void __init sun4v_pgprot_init(void)
kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
-#ifdef CONFIG_DEBUG_PAGEALLOC
- kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^
- 0xfffff80000000000UL;
-#else
- kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
- 0xfffff80000000000UL;
-#endif
- kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
- _PAGE_P_4V | _PAGE_W_4V);
+ for (i = 1; i < 4; i++)
+ kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
__ACCESS_BITS_4V | _PAGE_E_4V);
- _PAGE_SZBITS = _PAGE_SZBITS_4V;
_PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
_PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
_PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
@@ -2331,3 +2476,281 @@ void __flush_tlb_all(void)
__asm__ __volatile__("wrpr %0, 0, %%pstate"
: : "r" (pstate));
}
+
+static pte_t *get_from_cache(struct mm_struct *mm)
+{
+ struct page *page;
+ pte_t *ret;
+
+ spin_lock(&mm->page_table_lock);
+ page = mm->context.pgtable_page;
+ ret = NULL;
+ if (page) {
+ void *p = page_address(page);
+
+ mm->context.pgtable_page = NULL;
+
+ ret = (pte_t *) (p + (PAGE_SIZE / 2));
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ return ret;
+}
+
+static struct page *__alloc_for_cache(struct mm_struct *mm)
+{
+ struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+ __GFP_REPEAT | __GFP_ZERO);
+
+ if (page) {
+ spin_lock(&mm->page_table_lock);
+ if (!mm->context.pgtable_page) {
+ atomic_set(&page->_count, 2);
+ mm->context.pgtable_page = page;
+ }
+ spin_unlock(&mm->page_table_lock);
+ }
+ return page;
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+{
+ struct page *page;
+ pte_t *pte;
+
+ pte = get_from_cache(mm);
+ if (pte)
+ return pte;
+
+ page = __alloc_for_cache(mm);
+ if (page)
+ pte = (pte_t *) page_address(page);
+
+ return pte;
+}
+
+pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ struct page *page;
+ pte_t *pte;
+
+ pte = get_from_cache(mm);
+ if (pte)
+ return pte;
+
+ page = __alloc_for_cache(mm);
+ if (page) {
+ pgtable_page_ctor(page);
+ pte = (pte_t *) page_address(page);
+ }
+
+ return pte;
+}
+
+void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ struct page *page = virt_to_page(pte);
+ if (put_page_testzero(page))
+ free_hot_cold_page(page, 0);
+}
+
+static void __pte_free(pgtable_t pte)
+{
+ struct page *page = virt_to_page(pte);
+ if (put_page_testzero(page)) {
+ pgtable_page_dtor(page);
+ free_hot_cold_page(page, 0);
+ }
+}
+
+void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+ __pte_free(pte);
+}
+
+void pgtable_free(void *table, bool is_page)
+{
+ if (is_page)
+ __pte_free(table);
+ else
+ kmem_cache_free(pgtable_cache, table);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify)
+{
+ if (pgprot_val(pgprot) & _PAGE_VALID)
+ pmd_val(pmd) |= PMD_HUGE_PRESENT;
+ if (tlb_type == hypervisor) {
+ if (pgprot_val(pgprot) & _PAGE_WRITE_4V)
+ pmd_val(pmd) |= PMD_HUGE_WRITE;
+ if (pgprot_val(pgprot) & _PAGE_EXEC_4V)
+ pmd_val(pmd) |= PMD_HUGE_EXEC;
+
+ if (!for_modify) {
+ if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V)
+ pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+ if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V)
+ pmd_val(pmd) |= PMD_HUGE_DIRTY;
+ }
+ } else {
+ if (pgprot_val(pgprot) & _PAGE_WRITE_4U)
+ pmd_val(pmd) |= PMD_HUGE_WRITE;
+ if (pgprot_val(pgprot) & _PAGE_EXEC_4U)
+ pmd_val(pmd) |= PMD_HUGE_EXEC;
+
+ if (!for_modify) {
+ if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U)
+ pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+ if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U)
+ pmd_val(pmd) |= PMD_HUGE_DIRTY;
+ }
+ }
+
+ return pmd;
+}
+
+pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+{
+ pmd_t pmd;
+
+ pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT)));
+ pmd_val(pmd) |= PMD_ISHUGE;
+ pmd = pmd_set_protbits(pmd, pgprot, false);
+ return pmd;
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
+ PMD_HUGE_WRITE |
+ PMD_HUGE_EXEC);
+ pmd = pmd_set_protbits(pmd, newprot, true);
+ return pmd;
+}
+
+pgprot_t pmd_pgprot(pmd_t entry)
+{
+ unsigned long pte = 0;
+
+ if (pmd_val(entry) & PMD_HUGE_PRESENT)
+ pte |= _PAGE_VALID;
+
+ if (tlb_type == hypervisor) {
+ if (pmd_val(entry) & PMD_HUGE_PRESENT)
+ pte |= _PAGE_PRESENT_4V;
+ if (pmd_val(entry) & PMD_HUGE_EXEC)
+ pte |= _PAGE_EXEC_4V;
+ if (pmd_val(entry) & PMD_HUGE_WRITE)
+ pte |= _PAGE_W_4V;
+ if (pmd_val(entry) & PMD_HUGE_ACCESSED)
+ pte |= _PAGE_ACCESSED_4V;
+ if (pmd_val(entry) & PMD_HUGE_DIRTY)
+ pte |= _PAGE_MODIFIED_4V;
+ pte |= _PAGE_CP_4V|_PAGE_CV_4V;
+ } else {
+ if (pmd_val(entry) & PMD_HUGE_PRESENT)
+ pte |= _PAGE_PRESENT_4U;
+ if (pmd_val(entry) & PMD_HUGE_EXEC)
+ pte |= _PAGE_EXEC_4U;
+ if (pmd_val(entry) & PMD_HUGE_WRITE)
+ pte |= _PAGE_W_4U;
+ if (pmd_val(entry) & PMD_HUGE_ACCESSED)
+ pte |= _PAGE_ACCESSED_4U;
+ if (pmd_val(entry) & PMD_HUGE_DIRTY)
+ pte |= _PAGE_MODIFIED_4U;
+ pte |= _PAGE_CP_4U|_PAGE_CV_4U;
+ }
+
+ return __pgprot(pte);
+}
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd)
+{
+ unsigned long pte, flags;
+ struct mm_struct *mm;
+ pmd_t entry = *pmd;
+ pgprot_t prot;
+
+ if (!pmd_large(entry) || !pmd_young(entry))
+ return;
+
+ pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS);
+ pte <<= PMD_PADDR_SHIFT;
+ pte |= _PAGE_VALID;
+
+ prot = pmd_pgprot(entry);
+
+ if (tlb_type == hypervisor)
+ pgprot_val(prot) |= _PAGE_SZHUGE_4V;
+ else
+ pgprot_val(prot) |= _PAGE_SZHUGE_4U;
+
+ pte |= pgprot_val(prot);
+
+ mm = vma->vm_mm;
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+
+ if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
+ __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+ addr, pte);
+
+ spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static void context_reload(void *__data)
+{
+ struct mm_struct *mm = __data;
+
+ if (mm == current->mm)
+ load_secondary_context(mm);
+}
+
+void hugetlb_setup(struct mm_struct *mm)
+{
+ struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
+
+ if (likely(tp->tsb != NULL))
+ return;
+
+ tsb_grow(mm, MM_TSB_HUGE, 0);
+ tsb_context_switch(mm);
+ smp_tsb_sync(mm);
+
+ /* On UltraSPARC-III+ and later, configure the second half of
+ * the Data-TLB for huge pages.
+ */
+ if (tlb_type == cheetah_plus) {
+ unsigned long ctx;
+
+ spin_lock(&ctx_alloc_lock);
+ ctx = mm->context.sparc64_ctx_val;
+ ctx &= ~CTX_PGSZ_MASK;
+ ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
+ ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
+
+ if (ctx != mm->context.sparc64_ctx_val) {
+ /* When changing the page size fields, we
+ * must perform a context flush so that no
+ * stale entries match. This flush must
+ * occur with the original context register
+ * settings.
+ */
+ do_flush_tlb_mm(mm);
+
+ /* Reload the context register of all processors
+ * also executing in this address space.
+ */
+ mm->context.sparc64_ctx_val = ctx;
+ on_each_cpu(context_reload, mm, 0);
+ }
+ spin_unlock(&ctx_alloc_lock);
+ }
+}
+#endif
diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h
index 3e1ac8b96ca..0661aa606de 100644
--- a/arch/sparc/mm/init_64.h
+++ b/arch/sparc/mm/init_64.h
@@ -8,12 +8,12 @@
#define MAX_PHYS_ADDRESS (1UL << 41UL)
#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
#define KPTE_BITMAP_BYTES \
- ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8)
+ ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL)
#define VALID_ADDR_BITMAP_BYTES \
((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
-extern unsigned long kern_linear_pte_xor[2];
+extern unsigned long kern_linear_pte_xor[4];
extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
extern unsigned int sparc64_highest_unlocked_tlb_ent;
extern unsigned long sparc64_kern_pri_context;
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index a8a58cad9d2..0f4f7191fbb 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -90,8 +90,8 @@ static void __init sbus_iommu_init(struct platform_device *op)
it to us. */
tmp = __get_free_pages(GFP_KERNEL, IOMMU_ORDER);
if (!tmp) {
- prom_printf("Unable to allocate iommu table [0x%08x]\n",
- IOMMU_NPTES*sizeof(iopte_t));
+ prom_printf("Unable to allocate iommu table [0x%lx]\n",
+ IOMMU_NPTES * sizeof(iopte_t));
prom_halt();
}
iommu->page_table = (iopte_t *)tmp;
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index b1f279cd00b..3e8fec391fe 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -43,16 +43,37 @@ void flush_tlb_pending(void)
put_cpu_var(tlb_batch);
}
-void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
- pte_t *ptep, pte_t orig, int fullmm)
+static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+ bool exec)
{
struct tlb_batch *tb = &get_cpu_var(tlb_batch);
unsigned long nr;
vaddr &= PAGE_MASK;
- if (pte_exec(orig))
+ if (exec)
vaddr |= 0x1UL;
+ nr = tb->tlb_nr;
+
+ if (unlikely(nr != 0 && mm != tb->mm)) {
+ flush_tlb_pending();
+ nr = 0;
+ }
+
+ if (nr == 0)
+ tb->mm = mm;
+
+ tb->vaddrs[nr] = vaddr;
+ tb->tlb_nr = ++nr;
+ if (nr >= TLB_BATCH_NR)
+ flush_tlb_pending();
+
+ put_cpu_var(tlb_batch);
+}
+
+void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+ pte_t *ptep, pte_t orig, int fullmm)
+{
if (tlb_type != hypervisor &&
pte_dirty(orig)) {
unsigned long paddr, pfn = pte_pfn(orig);
@@ -77,26 +98,91 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
}
no_cache_flush:
+ if (!fullmm)
+ tlb_batch_add_one(mm, vaddr, pte_exec(orig));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
+ pmd_t pmd, bool exec)
+{
+ unsigned long end;
+ pte_t *pte;
+
+ pte = pte_offset_map(&pmd, vaddr);
+ end = vaddr + HPAGE_SIZE;
+ while (vaddr < end) {
+ if (pte_val(*pte) & _PAGE_VALID)
+ tlb_batch_add_one(mm, vaddr, exec);
+ pte++;
+ vaddr += PAGE_SIZE;
+ }
+ pte_unmap(pte);
+}
- if (fullmm) {
- put_cpu_var(tlb_batch);
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ pmd_t orig = *pmdp;
+
+ *pmdp = pmd;
+
+ if (mm == &init_mm)
return;
+
+ if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) {
+ if (pmd_val(pmd) & PMD_ISHUGE)
+ mm->context.huge_pte_count++;
+ else
+ mm->context.huge_pte_count--;
+ if (mm->context.huge_pte_count == 1)
+ hugetlb_setup(mm);
}
- nr = tb->tlb_nr;
+ if (!pmd_none(orig)) {
+ bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
- if (unlikely(nr != 0 && mm != tb->mm)) {
- flush_tlb_pending();
- nr = 0;
+ addr &= HPAGE_MASK;
+ if (pmd_val(orig) & PMD_ISHUGE)
+ tlb_batch_add_one(mm, addr, exec);
+ else
+ tlb_batch_pmd_scan(mm, addr, orig, exec);
}
+}
- if (nr == 0)
- tb->mm = mm;
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
+{
+ struct list_head *lh = (struct list_head *) pgtable;
- tb->vaddrs[nr] = vaddr;
- tb->tlb_nr = ++nr;
- if (nr >= TLB_BATCH_NR)
- flush_tlb_pending();
+ assert_spin_locked(&mm->page_table_lock);
- put_cpu_var(tlb_batch);
+ /* FIFO */
+ if (!mm->pmd_huge_pte)
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) mm->pmd_huge_pte);
+ mm->pmd_huge_pte = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
+{
+ struct list_head *lh;
+ pgtable_t pgtable;
+
+ assert_spin_locked(&mm->page_table_lock);
+
+ /* FIFO */
+ pgtable = mm->pmd_huge_pte;
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ mm->pmd_huge_pte = NULL;
+ else {
+ mm->pmd_huge_pte = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ pte_val(pgtable[0]) = 0;
+ pte_val(pgtable[1]) = 0;
+
+ return pgtable;
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index c52add79b83..7f647434749 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -78,7 +78,7 @@ void flush_tsb_user(struct tlb_batch *tb)
base = __pa(base);
__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
@@ -90,29 +90,12 @@ void flush_tsb_user(struct tlb_batch *tb)
spin_unlock_irqrestore(&mm->context.lock, flags);
}
-#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
-#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
-#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K
-#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K
-#else
-#error Broken base page size setting...
-#endif
-#ifdef CONFIG_HUGETLB_PAGE
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K
-#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K
-#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
-#else
-#error Broken huge page size setting...
-#endif
#endif
static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
@@ -207,7 +190,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
case MM_TSB_BASE:
hp->pgsz_idx = HV_PGSZ_IDX_BASE;
break;
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
case MM_TSB_HUGE:
hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
break;
@@ -222,7 +205,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
case MM_TSB_BASE:
hp->pgsz_mask = HV_PGSZ_MASK_BASE;
break;
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
case MM_TSB_HUGE:
hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
break;
@@ -444,7 +427,7 @@ retry_tsb_alloc:
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
unsigned long huge_pte_count;
#endif
unsigned int i;
@@ -453,7 +436,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
mm->context.sparc64_ctx_val = 0UL;
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
/* We reset it to zero because the fork() page copying
* will re-increment the counters as the parent PTEs are
* copied into the child address space.
@@ -462,6 +445,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
mm->context.huge_pte_count = 0;
#endif
+ mm->context.pgtable_page = NULL;
+
/* copy_mm() copies over the parent's mm_struct before calling
* us, so we need to zero out the TSB pointer or else tsb_grow()
* will be confused and think there is an older TSB to free up.
@@ -474,7 +459,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
*/
tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (unlikely(huge_pte_count))
tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
#endif
@@ -500,10 +485,17 @@ static void tsb_destroy_one(struct tsb_config *tp)
void destroy_context(struct mm_struct *mm)
{
unsigned long flags, i;
+ struct page *page;
for (i = 0; i < MM_NUM_TSBS; i++)
tsb_destroy_one(&mm->context.tsb_block[i]);
+ page = mm->context.pgtable_page;
+ if (page && put_page_testzero(page)) {
+ pgtable_page_dtor(page);
+ free_hot_cold_page(page, 0);
+ }
+
spin_lock_irqsave(&ctx_alloc_lock, flags);
if (CTX_VALID(mm->context)) {
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index e9073e9501b..28368701ef7 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -464,8 +464,12 @@ void bpf_jit_compile(struct sk_filter *fp)
emit_alu_K(OR, K);
break;
case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
+ case BPF_S_ALU_XOR_X:
emit_alu_X(XOR);
break;
+ case BPF_S_ALU_XOR_K: /* A ^= K */
+ emit_alu_K(XOR, K);
+ break;
case BPF_S_ALU_LSH_X: /* A <<= X */
emit_alu_X(SLL);
break;