diff options
Diffstat (limited to 'arch/x86/lib')
34 files changed, 1200 insertions, 1157 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index b00f6785da7..4d4f96a2763 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -16,7 +16,7 @@ clean-files := inat-tables.c obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o -lib-y := delay.o +lib-y := delay.o misc.o cmdline.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o lib-y += memcpy_$(BITS).o @@ -24,7 +24,7 @@ lib-$(CONFIG_SMP) += rwlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o -obj-y += msr.o msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o hash.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o @@ -32,7 +32,6 @@ ifeq ($(CONFIG_X86_32),y) lib-y += checksum_32.o lib-y += strstr_32.o lib-y += string_32.o - lib-y += cmpxchg.o ifneq ($(CONFIG_X86_CMPXCHG64),y) lib-y += cmpxchg8b_emu.o atomic64_386_32.o endif diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 042f6826bf5..a0b4a350daa 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c @@ -1,59 +1,4 @@ -#include <linux/compiler.h> -#include <linux/module.h> -#include <linux/types.h> +#define ATOMIC64_EXPORT EXPORT_SYMBOL -#include <asm/processor.h> -#include <asm/cmpxchg.h> +#include <linux/export.h> #include <linux/atomic.h> - -long long atomic64_read_cx8(long long, const atomic64_t *v); -EXPORT_SYMBOL(atomic64_read_cx8); -long long atomic64_set_cx8(long long, const atomic64_t *v); -EXPORT_SYMBOL(atomic64_set_cx8); -long long atomic64_xchg_cx8(long long, unsigned high); -EXPORT_SYMBOL(atomic64_xchg_cx8); -long long atomic64_add_return_cx8(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_add_return_cx8); -long long atomic64_sub_return_cx8(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_sub_return_cx8); -long long atomic64_inc_return_cx8(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_inc_return_cx8); -long long atomic64_dec_return_cx8(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_dec_return_cx8); -long long atomic64_dec_if_positive_cx8(atomic64_t *v); -EXPORT_SYMBOL(atomic64_dec_if_positive_cx8); -int atomic64_inc_not_zero_cx8(atomic64_t *v); -EXPORT_SYMBOL(atomic64_inc_not_zero_cx8); -int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u); -EXPORT_SYMBOL(atomic64_add_unless_cx8); - -#ifndef CONFIG_X86_CMPXCHG64 -long long atomic64_read_386(long long, const atomic64_t *v); -EXPORT_SYMBOL(atomic64_read_386); -long long atomic64_set_386(long long, const atomic64_t *v); -EXPORT_SYMBOL(atomic64_set_386); -long long atomic64_xchg_386(long long, unsigned high); -EXPORT_SYMBOL(atomic64_xchg_386); -long long atomic64_add_return_386(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_add_return_386); -long long atomic64_sub_return_386(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_sub_return_386); -long long atomic64_inc_return_386(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_inc_return_386); -long long atomic64_dec_return_386(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_dec_return_386); -long long atomic64_add_386(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_add_386); -long long atomic64_sub_386(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_sub_386); -long long atomic64_inc_386(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_inc_386); -long long atomic64_dec_386(long long a, atomic64_t *v); -EXPORT_SYMBOL(atomic64_dec_386); -long long atomic64_dec_if_positive_386(atomic64_t *v); -EXPORT_SYMBOL(atomic64_dec_if_positive_386); -int atomic64_inc_not_zero_386(atomic64_t *v); -EXPORT_SYMBOL(atomic64_inc_not_zero_386); -int atomic64_add_unless_386(atomic64_t *v, long long a, long long u); -EXPORT_SYMBOL(atomic64_add_unless_386); -#endif diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index e8e7e0d06f4..00933d5e992 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -137,13 +137,13 @@ BEGIN(dec_return) RET_ENDP #undef v -#define v %ecx +#define v %esi BEGIN(add_unless) - addl %eax, %esi + addl %eax, %ecx adcl %edx, %edi addl (v), %eax adcl 4(v), %edx - cmpl %eax, %esi + cmpl %eax, %ecx je 3f 1: movl %eax, (v) diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 391a083674b..f5cc9eb1d51 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8) ENTRY(atomic64_xchg_cx8) CFI_STARTPROC - movl %ebx, %eax - movl %ecx, %edx 1: LOCK_PREFIX cmpxchg8b (%esi) @@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8) movl %edx, %edi movl %ecx, %ebp - read64 %ebp + read64 %ecx 1: movl %eax, %ebx movl %edx, %ecx @@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8) SAVE ebx /* these just push these two parameters on the stack */ SAVE edi - SAVE esi + SAVE ecx - movl %ecx, %ebp - movl %eax, %esi + movl %eax, %ebp movl %edx, %edi - read64 %ebp + read64 %esi 1: cmpl %eax, 0(%esp) je 4f 2: movl %eax, %ebx movl %edx, %ecx - addl %esi, %ebx + addl %ebp, %ebx adcl %edi, %ecx LOCK_PREFIX - cmpxchg8b (%ebp) + cmpxchg8b (%esi) jne 1b movl $1, %eax @@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8) read64 %esi 1: - testl %eax, %eax - je 4f -2: + movl %eax, %ecx + orl %edx, %ecx + jz 3f movl %eax, %ebx - movl %edx, %ecx + xorl %ecx, %ecx addl $1, %ebx - adcl $0, %ecx + adcl %edx, %ecx LOCK_PREFIX cmpxchg8b (%esi) jne 1b @@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8) 3: RESTORE ebx ret -4: - testl %edx, %edx - jne 2b - jmp 3b CFI_ENDPROC ENDPROC(atomic64_inc_not_zero_cx8) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 78d16a554db..e78b8eee661 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -28,6 +28,7 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/errno.h> +#include <asm/asm.h> /* * computes a partial checksum, e.g. for TCP/UDP fragments @@ -60,7 +61,7 @@ ENTRY(csum_partial) testl $3, %esi # Check alignment. jz 2f # Jump if alignment is ok. testl $1, %esi # Check alignment. - jz 10f # Jump if alignment is boundary of 2bytes. + jz 10f # Jump if alignment is boundary of 2 bytes. # buf is odd dec %ecx @@ -282,15 +283,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, #define SRC(y...) \ 9999: y; \ - .section __ex_table, "a"; \ - .long 9999b, 6001f ; \ - .previous + _ASM_EXTABLE(9999b, 6001f) #define DST(y...) \ 9999: y; \ - .section __ex_table, "a"; \ - .long 9999b, 6002f ; \ - .previous + _ASM_EXTABLE(9999b, 6002f) #ifndef CONFIG_X86_USE_PPRO_CHECKSUM diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c new file mode 100644 index 00000000000..422db000d72 --- /dev/null +++ b/arch/x86/lib/cmdline.c @@ -0,0 +1,84 @@ +/* + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * Misc librarized functions for cmdline poking. + */ +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <asm/setup.h> + +static inline int myisspace(u8 c) +{ + return c <= ' '; /* Close enough approximation */ +} + +/** + * Find a boolean option (like quiet,noapic,nosmp....) + * + * @cmdline: the cmdline string + * @option: option string to look for + * + * Returns the position of that @option (starts counting with 1) + * or 0 on not found. + */ +int cmdline_find_option_bool(const char *cmdline, const char *option) +{ + char c; + int len, pos = 0, wstart = 0; + const char *opptr = NULL; + enum { + st_wordstart = 0, /* Start of word/after whitespace */ + st_wordcmp, /* Comparing this word */ + st_wordskip, /* Miscompare, skip */ + } state = st_wordstart; + + if (!cmdline) + return -1; /* No command line */ + + len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE); + if (!len) + return 0; + + while (len--) { + c = *(char *)cmdline++; + pos++; + + switch (state) { + case st_wordstart: + if (!c) + return 0; + else if (myisspace(c)) + break; + + state = st_wordcmp; + opptr = option; + wstart = pos; + /* fall through */ + + case st_wordcmp: + if (!*opptr) + if (!c || myisspace(c)) + return wstart; + else + state = st_wordskip; + else if (!c) + return 0; + else if (c != *opptr++) + state = st_wordskip; + else if (!len) /* last word and is matching */ + return wstart; + break; + + case st_wordskip: + if (!c) + return 0; + else if (myisspace(c)) + state = st_wordstart; + break; + } + } + + return 0; /* Buffer overrun */ +} diff --git a/arch/x86/lib/cmpxchg.c b/arch/x86/lib/cmpxchg.c deleted file mode 100644 index 5d619f6df3e..00000000000 --- a/arch/x86/lib/cmpxchg.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * cmpxchg*() fallbacks for CPU not supporting these instructions - */ - -#include <linux/kernel.h> -#include <linux/smp.h> -#include <linux/module.h> - -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 01c805ba535..176cca67212 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -5,96 +5,90 @@ #include <asm/alternative-asm.h> ALIGN -copy_page_c: +copy_page_rep: CFI_STARTPROC - movl $4096/8,%ecx - rep movsq + movl $4096/8, %ecx + rep movsq ret CFI_ENDPROC -ENDPROC(copy_page_c) +ENDPROC(copy_page_rep) -/* Don't use streaming store because it's better when the target - ends up in cache. */ - -/* Could vary the prefetch distance based on SMP/UP */ +/* + * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD. + * Could vary the prefetch distance based on SMP/UP. +*/ ENTRY(copy_page) CFI_STARTPROC - subq $3*8,%rsp - CFI_ADJUST_CFA_OFFSET 3*8 - movq %rbx,(%rsp) + subq $2*8, %rsp + CFI_ADJUST_CFA_OFFSET 2*8 + movq %rbx, (%rsp) CFI_REL_OFFSET rbx, 0 - movq %r12,1*8(%rsp) + movq %r12, 1*8(%rsp) CFI_REL_OFFSET r12, 1*8 - movq %r13,2*8(%rsp) - CFI_REL_OFFSET r13, 2*8 - movl $(4096/64)-5,%ecx + movl $(4096/64)-5, %ecx .p2align 4 .Loop64: - dec %rcx - - movq (%rsi), %rax - movq 8 (%rsi), %rbx - movq 16 (%rsi), %rdx - movq 24 (%rsi), %r8 - movq 32 (%rsi), %r9 - movq 40 (%rsi), %r10 - movq 48 (%rsi), %r11 - movq 56 (%rsi), %r12 + dec %rcx + movq 0x8*0(%rsi), %rax + movq 0x8*1(%rsi), %rbx + movq 0x8*2(%rsi), %rdx + movq 0x8*3(%rsi), %r8 + movq 0x8*4(%rsi), %r9 + movq 0x8*5(%rsi), %r10 + movq 0x8*6(%rsi), %r11 + movq 0x8*7(%rsi), %r12 prefetcht0 5*64(%rsi) - movq %rax, (%rdi) - movq %rbx, 8 (%rdi) - movq %rdx, 16 (%rdi) - movq %r8, 24 (%rdi) - movq %r9, 32 (%rdi) - movq %r10, 40 (%rdi) - movq %r11, 48 (%rdi) - movq %r12, 56 (%rdi) + movq %rax, 0x8*0(%rdi) + movq %rbx, 0x8*1(%rdi) + movq %rdx, 0x8*2(%rdi) + movq %r8, 0x8*3(%rdi) + movq %r9, 0x8*4(%rdi) + movq %r10, 0x8*5(%rdi) + movq %r11, 0x8*6(%rdi) + movq %r12, 0x8*7(%rdi) - leaq 64 (%rsi), %rsi - leaq 64 (%rdi), %rdi + leaq 64 (%rsi), %rsi + leaq 64 (%rdi), %rdi - jnz .Loop64 + jnz .Loop64 - movl $5,%ecx + movl $5, %ecx .p2align 4 .Loop2: - decl %ecx - - movq (%rsi), %rax - movq 8 (%rsi), %rbx - movq 16 (%rsi), %rdx - movq 24 (%rsi), %r8 - movq 32 (%rsi), %r9 - movq 40 (%rsi), %r10 - movq 48 (%rsi), %r11 - movq 56 (%rsi), %r12 - - movq %rax, (%rdi) - movq %rbx, 8 (%rdi) - movq %rdx, 16 (%rdi) - movq %r8, 24 (%rdi) - movq %r9, 32 (%rdi) - movq %r10, 40 (%rdi) - movq %r11, 48 (%rdi) - movq %r12, 56 (%rdi) - - leaq 64(%rdi),%rdi - leaq 64(%rsi),%rsi - + decl %ecx + + movq 0x8*0(%rsi), %rax + movq 0x8*1(%rsi), %rbx + movq 0x8*2(%rsi), %rdx + movq 0x8*3(%rsi), %r8 + movq 0x8*4(%rsi), %r9 + movq 0x8*5(%rsi), %r10 + movq 0x8*6(%rsi), %r11 + movq 0x8*7(%rsi), %r12 + + movq %rax, 0x8*0(%rdi) + movq %rbx, 0x8*1(%rdi) + movq %rdx, 0x8*2(%rdi) + movq %r8, 0x8*3(%rdi) + movq %r9, 0x8*4(%rdi) + movq %r10, 0x8*5(%rdi) + movq %r11, 0x8*6(%rdi) + movq %r12, 0x8*7(%rdi) + + leaq 64(%rdi), %rdi + leaq 64(%rsi), %rsi jnz .Loop2 - movq (%rsp),%rbx + movq (%rsp), %rbx CFI_RESTORE rbx - movq 1*8(%rsp),%r12 + movq 1*8(%rsp), %r12 CFI_RESTORE r12 - movq 2*8(%rsp),%r13 - CFI_RESTORE r13 - addq $3*8,%rsp - CFI_ADJUST_CFA_OFFSET -3*8 + addq $2*8, %rsp + CFI_ADJUST_CFA_OFFSET -2*8 ret .Lcopy_page_end: CFI_ENDPROC @@ -107,7 +101,7 @@ ENDPROC(copy_page) .section .altinstr_replacement,"ax" 1: .byte 0xeb /* jmp <disp8> */ - .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ + .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */ 2: .previous .section .altinstructions,"a" diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 024840266ba..dee945d5559 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -16,6 +16,8 @@ #include <asm/thread_info.h> #include <asm/cpufeature.h> #include <asm/alternative-asm.h> +#include <asm/asm.h> +#include <asm/smap.h> /* * By placing feature2 after feature1 in altinstructions section, we logically @@ -63,11 +65,8 @@ jmp copy_user_handle_tail .previous - .section __ex_table,"a" - .align 8 - .quad 100b,103b - .quad 101b,103b - .previous + _ASM_EXTABLE(100b,103b) + _ASM_EXTABLE(101b,103b) #endif .endm @@ -132,6 +131,7 @@ ENDPROC(bad_from_user) */ ENTRY(copy_user_generic_unrolled) CFI_STARTPROC + ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -179,41 +179,39 @@ ENTRY(copy_user_generic_unrolled) decl %ecx jnz 21b 23: xor %eax,%eax + ASM_CLAC ret .section .fixup,"ax" 30: shll $6,%ecx addl %ecx,%edx jmp 60f -40: lea (%rdx,%rcx,8),%rdx +40: leal (%rdx,%rcx,8),%edx jmp 60f 50: movl %ecx,%edx 60: jmp copy_user_handle_tail /* ecx is zerorest also */ .previous - .section __ex_table,"a" - .align 8 - .quad 1b,30b - .quad 2b,30b - .quad 3b,30b - .quad 4b,30b - .quad 5b,30b - .quad 6b,30b - .quad 7b,30b - .quad 8b,30b - .quad 9b,30b - .quad 10b,30b - .quad 11b,30b - .quad 12b,30b - .quad 13b,30b - .quad 14b,30b - .quad 15b,30b - .quad 16b,30b - .quad 18b,40b - .quad 19b,40b - .quad 21b,50b - .quad 22b,50b - .previous + _ASM_EXTABLE(1b,30b) + _ASM_EXTABLE(2b,30b) + _ASM_EXTABLE(3b,30b) + _ASM_EXTABLE(4b,30b) + _ASM_EXTABLE(5b,30b) + _ASM_EXTABLE(6b,30b) + _ASM_EXTABLE(7b,30b) + _ASM_EXTABLE(8b,30b) + _ASM_EXTABLE(9b,30b) + _ASM_EXTABLE(10b,30b) + _ASM_EXTABLE(11b,30b) + _ASM_EXTABLE(12b,30b) + _ASM_EXTABLE(13b,30b) + _ASM_EXTABLE(14b,30b) + _ASM_EXTABLE(15b,30b) + _ASM_EXTABLE(16b,30b) + _ASM_EXTABLE(18b,40b) + _ASM_EXTABLE(19b,40b) + _ASM_EXTABLE(21b,50b) + _ASM_EXTABLE(22b,50b) CFI_ENDPROC ENDPROC(copy_user_generic_unrolled) @@ -237,8 +235,7 @@ ENDPROC(copy_user_generic_unrolled) */ ENTRY(copy_user_generic_string) CFI_STARTPROC - andl %edx,%edx - jz 4f + ASM_STAC cmpl $8,%edx jb 2f /* less than 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -250,20 +247,18 @@ ENTRY(copy_user_generic_string) 2: movl %edx,%ecx 3: rep movsb -4: xorl %eax,%eax + xorl %eax,%eax + ASM_CLAC ret .section .fixup,"ax" -11: lea (%rdx,%rcx,8),%rcx +11: leal (%rdx,%rcx,8),%ecx 12: movl %ecx,%edx /* ecx is zerorest also */ jmp copy_user_handle_tail .previous - .section __ex_table,"a" - .align 8 - .quad 1b,11b - .quad 3b,12b - .previous + _ASM_EXTABLE(1b,11b) + _ASM_EXTABLE(3b,12b) CFI_ENDPROC ENDPROC(copy_user_generic_string) @@ -281,12 +276,12 @@ ENDPROC(copy_user_generic_string) */ ENTRY(copy_user_enhanced_fast_string) CFI_STARTPROC - andl %edx,%edx - jz 2f + ASM_STAC movl %edx,%ecx 1: rep movsb -2: xorl %eax,%eax + xorl %eax,%eax + ASM_CLAC ret .section .fixup,"ax" @@ -294,9 +289,6 @@ ENTRY(copy_user_enhanced_fast_string) jmp copy_user_handle_tail .previous - .section __ex_table,"a" - .align 8 - .quad 1b,12b - .previous + _ASM_EXTABLE(1b,12b) CFI_ENDPROC ENDPROC(copy_user_enhanced_fast_string) diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index cb0c112386f..6a4f43c2d9e 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -14,6 +14,8 @@ #include <asm/current.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> +#include <asm/asm.h> +#include <asm/smap.h> .macro ALIGN_DESTINATION #ifdef FIX_ALIGNMENT @@ -36,11 +38,8 @@ jmp copy_user_handle_tail .previous - .section __ex_table,"a" - .align 8 - .quad 100b,103b - .quad 101b,103b - .previous + _ASM_EXTABLE(100b,103b) + _ASM_EXTABLE(101b,103b) #endif .endm @@ -50,6 +49,7 @@ */ ENTRY(__copy_user_nocache) CFI_STARTPROC + ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -97,6 +97,7 @@ ENTRY(__copy_user_nocache) decl %ecx jnz 21b 23: xorl %eax,%eax + ASM_CLAC sfence ret @@ -111,27 +112,25 @@ ENTRY(__copy_user_nocache) jmp copy_user_handle_tail .previous - .section __ex_table,"a" - .quad 1b,30b - .quad 2b,30b - .quad 3b,30b - .quad 4b,30b - .quad 5b,30b - .quad 6b,30b - .quad 7b,30b - .quad 8b,30b - .quad 9b,30b - .quad 10b,30b - .quad 11b,30b - .quad 12b,30b - .quad 13b,30b - .quad 14b,30b - .quad 15b,30b - .quad 16b,30b - .quad 18b,40b - .quad 19b,40b - .quad 21b,50b - .quad 22b,50b - .previous + _ASM_EXTABLE(1b,30b) + _ASM_EXTABLE(2b,30b) + _ASM_EXTABLE(3b,30b) + _ASM_EXTABLE(4b,30b) + _ASM_EXTABLE(5b,30b) + _ASM_EXTABLE(6b,30b) + _ASM_EXTABLE(7b,30b) + _ASM_EXTABLE(8b,30b) + _ASM_EXTABLE(9b,30b) + _ASM_EXTABLE(10b,30b) + _ASM_EXTABLE(11b,30b) + _ASM_EXTABLE(12b,30b) + _ASM_EXTABLE(13b,30b) + _ASM_EXTABLE(14b,30b) + _ASM_EXTABLE(15b,30b) + _ASM_EXTABLE(16b,30b) + _ASM_EXTABLE(18b,40b) + _ASM_EXTABLE(19b,40b) + _ASM_EXTABLE(21b,50b) + _ASM_EXTABLE(22b,50b) CFI_ENDPROC ENDPROC(__copy_user_nocache) diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index fb903b758da..2419d5fefae 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/errno.h> +#include <asm/asm.h> /* * Checksum copy with exception handling. @@ -31,26 +32,17 @@ .macro source 10: - .section __ex_table, "a" - .align 8 - .quad 10b, .Lbad_source - .previous + _ASM_EXTABLE(10b, .Lbad_source) .endm .macro dest 20: - .section __ex_table, "a" - .align 8 - .quad 20b, .Lbad_dest - .previous + _ASM_EXTABLE(20b, .Lbad_dest) .endm .macro ignore L=.Lignore 30: - .section __ex_table, "a" - .align 8 - .quad 30b, \L - .previous + _ASM_EXTABLE(30b, \L) .endm diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 459b58a8a15..7609e0e421e 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -6,6 +6,7 @@ */ #include <asm/checksum.h> #include <linux/module.h> +#include <asm/smap.h> /** * csum_partial_copy_from_user - Copy and checksum from user space. @@ -52,8 +53,10 @@ csum_partial_copy_from_user(const void __user *src, void *dst, len -= 2; } } + stac(); isum = csum_partial_copy_generic((__force const void *)src, dst, len, isum, errp, NULL); + clac(); if (unlikely(*errp)) goto out_err; @@ -82,6 +85,8 @@ __wsum csum_partial_copy_to_user(const void *src, void __user *dst, int len, __wsum isum, int *errp) { + __wsum ret; + might_sleep(); if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) { @@ -105,8 +110,11 @@ csum_partial_copy_to_user(const void *src, void __user *dst, } *errp = 0; - return csum_partial_copy_generic(src, (void __force *)dst, - len, isum, NULL, errp); + stac(); + ret = csum_partial_copy_generic(src, (void __force *)dst, + len, isum, NULL, errp); + clac(); + return ret; } EXPORT_SYMBOL(csum_partial_copy_to_user); @@ -115,7 +123,7 @@ EXPORT_SYMBOL(csum_partial_copy_to_user); * @src: source address * @dst: destination address * @len: number of bytes to be copied. - * @isum: initial sum that is added into the result (32bit unfolded) + * @sum: initial sum that is added into the result (32bit unfolded) * * Returns an 32bit unfolded checksum of the buffer. */ diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index e395693abdb..39d6a3db0b9 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -16,7 +16,6 @@ #include <linux/timex.h> #include <linux/preempt.h> #include <linux/delay.h> -#include <linux/init.h> #include <asm/processor.h> #include <asm/delay.h> @@ -98,7 +97,7 @@ void use_tsc_delay(void) delay_fn = delay_tsc; } -int __devinit read_current_timer(unsigned long *timer_val) +int read_current_timer(unsigned long *timer_val) { if (delay_fn == delay_tsc) { rdtscll(*timer_val); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 51f1504cddd..a4512359656 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -15,11 +15,10 @@ * __get_user_X * * Inputs: %[r|e]ax contains the address. - * The register is modified, but all changes are undone - * before returning because the C code doesn't know about it. * * Outputs: %[r|e]ax is error code (0 or -EFAULT) * %[r|e]dx contains zero-extended value + * %ecx contains the high half for 32-bit __get_user_8 * * * These functions should not modify any other registers, @@ -33,6 +32,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/asm.h> +#include <asm/smap.h> .text ENTRY(__get_user_1) @@ -40,8 +40,10 @@ ENTRY(__get_user_1) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -1: movzb (%_ASM_AX),%edx + ASM_STAC +1: movzbl (%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_1) @@ -53,8 +55,10 @@ ENTRY(__get_user_2) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_2) @@ -66,39 +70,71 @@ ENTRY(__get_user_4) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -3: mov -3(%_ASM_AX),%edx + ASM_STAC +3: movl -3(%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_4) -#ifdef CONFIG_X86_64 ENTRY(__get_user_8) CFI_STARTPROC +#ifdef CONFIG_X86_64 add $7,%_ASM_AX jc bad_get_user GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -4: movq -7(%_ASM_AX),%_ASM_DX + jae bad_get_user + ASM_STAC +4: movq -7(%_ASM_AX),%rdx xor %eax,%eax + ASM_CLAC ret +#else + add $7,%_ASM_AX + jc bad_get_user_8 + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user_8 + ASM_STAC +4: movl -7(%_ASM_AX),%edx +5: movl -3(%_ASM_AX),%ecx + xor %eax,%eax + ASM_CLAC + ret +#endif CFI_ENDPROC ENDPROC(__get_user_8) -#endif + bad_get_user: CFI_STARTPROC xor %edx,%edx mov $(-EFAULT),%_ASM_AX + ASM_CLAC ret CFI_ENDPROC END(bad_get_user) -.section __ex_table,"a" - _ASM_PTR 1b,bad_get_user - _ASM_PTR 2b,bad_get_user - _ASM_PTR 3b,bad_get_user +#ifdef CONFIG_X86_32 +bad_get_user_8: + CFI_STARTPROC + xor %edx,%edx + xor %ecx,%ecx + mov $(-EFAULT),%_ASM_AX + ASM_CLAC + ret + CFI_ENDPROC +END(bad_get_user_8) +#endif + + _ASM_EXTABLE(1b,bad_get_user) + _ASM_EXTABLE(2b,bad_get_user) + _ASM_EXTABLE(3b,bad_get_user) #ifdef CONFIG_X86_64 - _ASM_PTR 4b,bad_get_user + _ASM_EXTABLE(4b,bad_get_user) +#else + _ASM_EXTABLE(4b,bad_get_user_8) + _ASM_EXTABLE(5b,bad_get_user_8) #endif diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c new file mode 100644 index 00000000000..ff4fa51a5b1 --- /dev/null +++ b/arch/x86/lib/hash.c @@ -0,0 +1,92 @@ +/* + * Some portions derived from code covered by the following notice: + * + * Copyright (c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/hash.h> +#include <linux/init.h> + +#include <asm/processor.h> +#include <asm/cpufeature.h> +#include <asm/hash.h> + +static inline u32 crc32_u32(u32 crc, u32 val) +{ +#ifdef CONFIG_AS_CRC32 + asm ("crc32l %1,%0\n" : "+r" (crc) : "rm" (val)); +#else + asm (".byte 0xf2, 0x0f, 0x38, 0xf1, 0xc1" : "+a" (crc) : "c" (val)); +#endif + return crc; +} + +static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed) +{ + const u32 *p32 = (const u32 *) data; + u32 i, tmp = 0; + + for (i = 0; i < len / 4; i++) + seed = crc32_u32(seed, *p32++); + + switch (len & 3) { + case 3: + tmp |= *((const u8 *) p32 + 2) << 16; + /* fallthrough */ + case 2: + tmp |= *((const u8 *) p32 + 1) << 8; + /* fallthrough */ + case 1: + tmp |= *((const u8 *) p32); + seed = crc32_u32(seed, tmp); + break; + } + + return seed; +} + +static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed) +{ + const u32 *p32 = (const u32 *) data; + u32 i; + + for (i = 0; i < len; i++) + seed = crc32_u32(seed, *p32++); + + return seed; +} + +void __init setup_arch_fast_hash(struct fast_hash_ops *ops) +{ + if (cpu_has_xmm4_2) { + ops->hash = intel_crc4_2_hash; + ops->hash2 = intel_crc4_2_hash2; + } +} diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 46fc4ee09fc..c1f01a8e9f6 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) return inat_primary_table[opcode]; } -insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, +int inat_get_last_prefix_id(insn_byte_t last_pfx) +{ + insn_attr_t lpfx_attr; + + lpfx_attr = inat_get_opcode_attribute(last_pfx); + return inat_last_prefix_id(lpfx_attr); +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, insn_attr_t esc_attr) { const insn_attr_t *table; - insn_attr_t lpfx_attr; - int n, m = 0; + int n; n = inat_escape_id(esc_attr); - if (last_pfx) { - lpfx_attr = inat_get_opcode_attribute(last_pfx); - m = inat_last_prefix_id(lpfx_attr); - } + table = inat_escape_tables[n][0]; if (!table) return 0; - if (inat_has_variant(table[opcode]) && m) { - table = inat_escape_tables[n][m]; + if (inat_has_variant(table[opcode]) && lpfx_id) { + table = inat_escape_tables[n][lpfx_id]; if (!table) return 0; } return table[opcode]; } -insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, insn_attr_t grp_attr) { const insn_attr_t *table; - insn_attr_t lpfx_attr; - int n, m = 0; + int n; n = inat_group_id(grp_attr); - if (last_pfx) { - lpfx_attr = inat_get_opcode_attribute(last_pfx); - m = inat_last_prefix_id(lpfx_attr); - } + table = inat_group_tables[n][0]; if (!table) return inat_group_common_attribute(grp_attr); - if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { - table = inat_group_tables[n][m]; + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { + table = inat_group_tables[n][lpfx_id]; if (!table) return inat_group_common_attribute(grp_attr); } @@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, const insn_attr_t *table; if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) return 0; - table = inat_avx_tables[vex_m][vex_p]; + /* At first, this checks the master table */ + table = inat_avx_tables[vex_m][0]; if (!table) return 0; + if (!inat_is_group(table[opcode]) && vex_p) { + /* If this is not a group, get attribute directly */ + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + } return table[opcode]; } diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 374562ed670..54fcffed28e 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -18,7 +18,11 @@ * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ +#ifdef __KERNEL__ #include <linux/string.h> +#else +#include <string.h> +#endif #include <asm/inat.h> #include <asm/insn.h> @@ -185,7 +189,8 @@ err_out: void insn_get_opcode(struct insn *insn) { struct insn_field *opcode = &insn->opcode; - insn_byte_t op, pfx; + insn_byte_t op; + int pfx_id; if (opcode->got) return; if (!insn->prefixes.got) @@ -202,7 +207,7 @@ void insn_get_opcode(struct insn *insn) m = insn_vex_m_bits(insn); p = insn_vex_p_bits(insn); insn->attr = inat_get_avx_attribute(op, m, p); - if (!inat_accept_vex(insn->attr)) + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) insn->attr = 0; /* This instruction is bad */ goto end; /* VEX has only 1 byte for opcode */ } @@ -212,8 +217,8 @@ void insn_get_opcode(struct insn *insn) /* Get escaped opcode */ op = get_next(insn_byte_t, insn); opcode->bytes[opcode->nbytes++] = op; - pfx = insn_last_prefix(insn); - insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); } if (inat_must_vex(insn->attr)) insn->attr = 0; /* This instruction is bad */ @@ -235,7 +240,7 @@ err_out: void insn_get_modrm(struct insn *insn) { struct insn_field *modrm = &insn->modrm; - insn_byte_t pfx, mod; + insn_byte_t pfx_id, mod; if (modrm->got) return; if (!insn->opcode.got) @@ -246,9 +251,11 @@ void insn_get_modrm(struct insn *insn) modrm->value = mod; modrm->nbytes = 1; if (inat_is_group(insn->attr)) { - pfx = insn_last_prefix(insn); - insn->attr = inat_get_group_attribute(mod, pfx, + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, insn->attr); + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) + insn->attr = 0; /* This is bad */ } } @@ -376,8 +383,8 @@ err_out: return; } -/* Decode moffset16/32/64 */ -static void __get_moffset(struct insn *insn) +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: @@ -394,15 +401,19 @@ static void __get_moffset(struct insn *insn) insn->moffset2.value = get_next(int, insn); insn->moffset2.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->moffset1.got = insn->moffset2.got = 1; + return 1; + err_out: - return; + return 0; } -/* Decode imm v32(Iz) */ -static void __get_immv32(struct insn *insn) +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -414,14 +425,18 @@ static void __get_immv32(struct insn *insn) insn->immediate.value = get_next(int, insn); insn->immediate.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } + return 1; + err_out: - return; + return 0; } -/* Decode imm v64(Iv/Ov) */ -static void __get_immv(struct insn *insn) +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -438,15 +453,18 @@ static void __get_immv(struct insn *insn) insn->immediate2.value = get_next(int, insn); insn->immediate2.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->immediate1.got = insn->immediate2.got = 1; + return 1; err_out: - return; + return 0; } /* Decode ptr16:16/32(Ap) */ -static void __get_immptr(struct insn *insn) +static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -459,14 +477,17 @@ static void __get_immptr(struct insn *insn) break; case 8: /* ptr16:64 is not exist (no segment) */ - return; + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->immediate2.value = get_next(unsigned short, insn); insn->immediate2.nbytes = 2; insn->immediate1.got = insn->immediate2.got = 1; + return 1; err_out: - return; + return 0; } /** @@ -486,7 +507,8 @@ void insn_get_immediate(struct insn *insn) insn_get_displacement(insn); if (inat_has_moffset(insn->attr)) { - __get_moffset(insn); + if (!__get_moffset(insn)) + goto err_out; goto done; } @@ -514,16 +536,20 @@ void insn_get_immediate(struct insn *insn) insn->immediate2.nbytes = 4; break; case INAT_IMM_PTR: - __get_immptr(insn); + if (!__get_immptr(insn)) + goto err_out; break; case INAT_IMM_VWORD32: - __get_immv32(insn); + if (!__get_immv32(insn)) + goto err_out; break; case INAT_IMM_VWORD: - __get_immv(insn); + if (!__get_immv(insn)) + goto err_out; break; default: - break; + /* Here, insn must have an immediate, but failed */ + goto err_out; } if (inat_has_second_immediate(insn->attr)) { insn->immediate2.value = get_next(char, insn); diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index b908a59eccf..a404b4b7553 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c @@ -4,7 +4,7 @@ #undef memcpy #undef memset -void *memcpy(void *to, const void *from, size_t n) +__visible void *memcpy(void *to, const void *from, size_t n) { #ifdef CONFIG_X86_USE_3DNOW return __memcpy3d(to, from, n); @@ -14,19 +14,19 @@ void *memcpy(void *to, const void *from, size_t n) } EXPORT_SYMBOL(memcpy); -void *memset(void *s, int c, size_t count) +__visible void *memset(void *s, int c, size_t count) { return __memset(s, c, count); } EXPORT_SYMBOL(memset); -void *memmove(void *dest, const void *src, size_t n) +__visible void *memmove(void *dest, const void *src, size_t n) { int d0,d1,d2,d3,d4,d5; char *ret = dest; __asm__ __volatile__( - /* Handle more 16bytes in loop */ + /* Handle more 16 bytes in loop */ "cmp $0x10, %0\n\t" "jb 1f\n\t" @@ -51,7 +51,7 @@ void *memmove(void *dest, const void *src, size_t n) "sub $0x10, %0\n\t" /* - * We gobble 16byts forward in each loop. + * We gobble 16 bytes forward in each loop. */ "3:\n\t" "sub $0x10, %0\n\t" @@ -117,7 +117,7 @@ void *memmove(void *dest, const void *src, size_t n) "sub $0x10, %0\n\t" /* - * We gobble 16byts backward in each loop. + * We gobble 16 bytes backward in each loop. */ "7:\n\t" "sub $0x10, %0\n\t" diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index efbf2a0ecde..56313a32618 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -27,9 +27,8 @@ .section .altinstr_replacement, "ax", @progbits .Lmemcpy_c: movq %rdi, %rax - - movl %edx, %ecx - shrl $3, %ecx + movq %rdx, %rcx + shrq $3, %rcx andl $7, %edx rep movsq movl %edx, %ecx @@ -48,8 +47,7 @@ .section .altinstr_replacement, "ax", @progbits .Lmemcpy_c_e: movq %rdi, %rax - - movl %edx, %ecx + movq %rdx, %rcx rep movsb ret .Lmemcpy_e_e: @@ -60,10 +58,7 @@ ENTRY(memcpy) CFI_STARTPROC movq %rdi, %rax - /* - * Use 32bit CMP here to avoid long NOP padding. - */ - cmp $0x20, %edx + cmpq $0x20, %rdx jb .Lhandle_tail /* @@ -72,7 +67,7 @@ ENTRY(memcpy) */ cmp %dil, %sil jl .Lcopy_backward - subl $0x20, %edx + subq $0x20, %rdx .Lcopy_forward_loop: subq $0x20, %rdx @@ -91,7 +86,7 @@ ENTRY(memcpy) movq %r11, 3*8(%rdi) leaq 4*8(%rdi), %rdi jae .Lcopy_forward_loop - addq $0x20, %rdx + addl $0x20, %edx jmp .Lhandle_tail .Lcopy_backward: @@ -103,7 +98,7 @@ ENTRY(memcpy) subq $0x20, %rdx /* * At most 3 ALU operations in one cycle, - * so append NOPS in the same 16bytes trunk. + * so append NOPS in the same 16 bytes trunk. */ .p2align 4 .Lcopy_backward_loop: @@ -123,11 +118,11 @@ ENTRY(memcpy) /* * Calculate copy position to head. */ - addq $0x20, %rdx + addl $0x20, %edx subq %rdx, %rsi subq %rdx, %rdi .Lhandle_tail: - cmpq $16, %rdx + cmpl $16, %edx jb .Lless_16bytes /* @@ -144,7 +139,7 @@ ENTRY(memcpy) retq .p2align 4 .Lless_16bytes: - cmpq $8, %rdx + cmpl $8, %edx jb .Lless_8bytes /* * Move data from 8 bytes to 15 bytes. @@ -156,7 +151,7 @@ ENTRY(memcpy) retq .p2align 4 .Lless_8bytes: - cmpq $4, %rdx + cmpl $4, %edx jb .Lless_3bytes /* @@ -169,18 +164,19 @@ ENTRY(memcpy) retq .p2align 4 .Lless_3bytes: - cmpl $0, %edx - je .Lend + subl $1, %edx + jb .Lend /* * Move data from 1 bytes to 3 bytes. */ -.Lloop_1: - movb (%rsi), %r8b - movb %r8b, (%rdi) - incq %rdi - incq %rsi - decl %edx - jnz .Lloop_1 + movzbl (%rsi), %ecx + jz .Lstore_1byte + movzbq 1(%rsi), %r8 + movzbq (%rsi, %rdx), %r9 + movb %r8b, 1(%rdi) + movb %r9b, (%rdi, %rdx) +.Lstore_1byte: + movb %cl, (%rdi) .Lend: retq diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index ee164610ec4..65268a6104f 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -27,7 +27,7 @@ ENTRY(memmove) CFI_STARTPROC - /* Handle more 32bytes in loop */ + /* Handle more 32 bytes in loop */ mov %rdi, %rax cmp $0x20, %rdx jb 1f @@ -56,7 +56,7 @@ ENTRY(memmove) 3: sub $0x20, %rdx /* - * We gobble 32byts forward in each loop. + * We gobble 32 bytes forward in each loop. */ 5: sub $0x20, %rdx @@ -122,7 +122,7 @@ ENTRY(memmove) addq %rdx, %rdi subq $0x20, %rdx /* - * We gobble 32byts backward in each loop. + * We gobble 32 bytes backward in each loop. */ 8: subq $0x20, %rdx diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 79bd454b78a..2dcb3808cbd 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -19,16 +19,15 @@ .section .altinstr_replacement, "ax", @progbits .Lmemset_c: movq %rdi,%r9 - movl %edx,%r8d - andl $7,%r8d - movl %edx,%ecx - shrl $3,%ecx + movq %rdx,%rcx + andl $7,%edx + shrq $3,%rcx /* expand byte value */ movzbl %sil,%esi movabs $0x0101010101010101,%rax - mulq %rsi /* with rax, clobbers rdx */ + imulq %rsi,%rax rep stosq - movl %r8d,%ecx + movl %edx,%ecx rep stosb movq %r9,%rax ret @@ -50,7 +49,7 @@ .Lmemset_c_e: movq %rdi,%r9 movb %sil,%al - movl %edx,%ecx + movq %rdx,%rcx rep stosb movq %r9,%rax ret @@ -61,12 +60,11 @@ ENTRY(memset) ENTRY(__memset) CFI_STARTPROC movq %rdi,%r10 - movq %rdx,%r11 /* expand byte value */ movzbl %sil,%ecx movabs $0x0101010101010101,%rax - mul %rcx /* with rax, clobbers rdx */ + imulq %rcx,%rax /* align dst */ movl %edi,%r9d @@ -75,13 +73,13 @@ ENTRY(__memset) CFI_REMEMBER_STATE .Lafter_bad_alignment: - movl %r11d,%ecx - shrl $6,%ecx + movq %rdx,%rcx + shrq $6,%rcx jz .Lhandle_tail .p2align 4 .Lloop_64: - decl %ecx + decq %rcx movq %rax,(%rdi) movq %rax,8(%rdi) movq %rax,16(%rdi) @@ -97,7 +95,7 @@ ENTRY(__memset) to predict jump tables. */ .p2align 4 .Lhandle_tail: - movl %r11d,%ecx + movl %edx,%ecx andl $63&(~7),%ecx jz .Lhandle_7 shrl $3,%ecx @@ -109,12 +107,11 @@ ENTRY(__memset) jnz .Lloop_8 .Lhandle_7: - movl %r11d,%ecx - andl $7,%ecx + andl $7,%edx jz .Lende .p2align 4 .Lloop_1: - decl %ecx + decl %edx movb %al,(%rdi) leaq 1(%rdi),%rdi jnz .Lloop_1 @@ -125,13 +122,13 @@ ENTRY(__memset) CFI_RESTORE_STATE .Lbad_alignment: - cmpq $7,%r11 + cmpq $7,%rdx jbe .Lhandle_7 movq %rax,(%rdi) /* unaligned store */ movq $8,%r8 subq %r9,%r8 addq %r8,%rdi - subq %r8,%r11 + subq %r8,%rdx jmp .Lafter_bad_alignment .Lfinal: CFI_ENDPROC diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c new file mode 100644 index 00000000000..76b373af03f --- /dev/null +++ b/arch/x86/lib/misc.c @@ -0,0 +1,21 @@ +/* + * Count the digits of @val including a possible sign. + * + * (Typed on and submitted from hpa's mobile phone.) + */ +int num_digits(int val) +{ + int m = 10; + int d = 1; + + if (val < 0) { + d++; + val = -val; + } + + while (val >= m) { + m *= 10; + d++; + } + return d; +} diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c index a311cc59b65..8d6ef78b5d0 100644 --- a/arch/x86/lib/msr-reg-export.c +++ b/arch/x86/lib/msr-reg-export.c @@ -1,5 +1,5 @@ #include <linux/module.h> #include <asm/msr.h> -EXPORT_SYMBOL(native_rdmsr_safe_regs); -EXPORT_SYMBOL(native_wrmsr_safe_regs); +EXPORT_SYMBOL(rdmsr_safe_regs); +EXPORT_SYMBOL(wrmsr_safe_regs); diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 69fa10623f2..f6d13eefad1 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -6,13 +6,13 @@ #ifdef CONFIG_X86_64 /* - * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]); + * int {rdmsr,wrmsr}_safe_regs(u32 gprs[8]); * * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] * */ .macro op_safe_regs op -ENTRY(native_\op\()_safe_regs) +ENTRY(\op\()_safe_regs) CFI_STARTPROC pushq_cfi %rbx pushq_cfi %rbp @@ -45,13 +45,13 @@ ENTRY(native_\op\()_safe_regs) _ASM_EXTABLE(1b, 3b) CFI_ENDPROC -ENDPROC(native_\op\()_safe_regs) +ENDPROC(\op\()_safe_regs) .endm #else /* X86_32 */ .macro op_safe_regs op -ENTRY(native_\op\()_safe_regs) +ENTRY(\op\()_safe_regs) CFI_STARTPROC pushl_cfi %ebx pushl_cfi %ebp @@ -92,7 +92,7 @@ ENTRY(native_\op\()_safe_regs) _ASM_EXTABLE(1b, 3b) CFI_ENDPROC -ENDPROC(native_\op\()_safe_regs) +ENDPROC(\op\()_safe_regs) .endm #endif diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c index a6b1b86d225..518532e6a3f 100644 --- a/arch/x86/lib/msr-smp.c +++ b/arch/x86/lib/msr-smp.c @@ -47,6 +47,21 @@ int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) } EXPORT_SYMBOL(rdmsr_on_cpu); +int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); + *q = rv.reg.q; + + return err; +} +EXPORT_SYMBOL(rdmsrl_on_cpu); + int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { int err; @@ -63,6 +78,22 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) } EXPORT_SYMBOL(wrmsr_on_cpu); +int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.q = q; + + err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); + + return err; +} +EXPORT_SYMBOL(wrmsrl_on_cpu); + static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs, void (*msr_func) (void *info)) @@ -159,6 +190,37 @@ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) } EXPORT_SYMBOL(wrmsr_safe_on_cpu); +int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.q = q; + + err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsrl_safe_on_cpu); + +int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); + *q = rv.reg.q; + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsrl_safe_on_cpu); + /* * These variants are significantly slower, but allows control over * the entire 32-bit GPR set. diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 8f8eebdca7d..43623739c7c 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -8,7 +8,7 @@ struct msr *msrs_alloc(void) msrs = alloc_percpu(struct msr); if (!msrs) { - pr_warning("%s: error allocating msrs\n", __func__); + pr_warn("%s: error allocating msrs\n", __func__); return NULL; } @@ -21,3 +21,90 @@ void msrs_free(struct msr *msrs) free_percpu(msrs); } EXPORT_SYMBOL(msrs_free); + +/** + * Read an MSR with error handling + * + * @msr: MSR to read + * @m: value to read into + * + * It returns read data only on success, otherwise it doesn't change the output + * argument @m. + * + */ +int msr_read(u32 msr, struct msr *m) +{ + int err; + u64 val; + + err = rdmsrl_safe(msr, &val); + if (!err) + m->q = val; + + return err; +} + +/** + * Write an MSR with error handling + * + * @msr: MSR to write + * @m: value to write + */ +int msr_write(u32 msr, struct msr *m) +{ + return wrmsrl_safe(msr, m->q); +} + +static inline int __flip_bit(u32 msr, u8 bit, bool set) +{ + struct msr m, m1; + int err = -EINVAL; + + if (bit > 63) + return err; + + err = msr_read(msr, &m); + if (err) + return err; + + m1 = m; + if (set) + m1.q |= BIT_64(bit); + else + m1.q &= ~BIT_64(bit); + + if (m1.q == m.q) + return 0; + + err = msr_write(msr, &m1); + if (err) + return err; + + return 1; +} + +/** + * Set @bit in a MSR @msr. + * + * Retval: + * < 0: An error was encountered. + * = 0: Bit was already set. + * > 0: Hardware accepted the MSR write. + */ +int msr_set_bit(u32 msr, u8 bit) +{ + return __flip_bit(msr, bit, true); +} + +/** + * Clear @bit in a MSR @msr. + * + * Retval: + * < 0: An error was encountered. + * = 0: Bit was already cleared. + * > 0: Hardware accepted the MSR write. + */ +int msr_clear_bit(u32 msr, u8 bit) +{ + return __flip_bit(msr, bit, false); +} diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 36b0d15ae6e..fc6ba17a7ee 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -15,6 +15,7 @@ #include <asm/thread_info.h> #include <asm/errno.h> #include <asm/asm.h> +#include <asm/smap.h> /* @@ -31,7 +32,8 @@ #define ENTER CFI_STARTPROC ; \ GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ +#define EXIT ASM_CLAC ; \ + ret ; \ CFI_ENDPROC .text @@ -39,6 +41,7 @@ ENTRY(__put_user_1) ENTER cmp TI_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user + ASM_STAC 1: movb %al,(%_ASM_CX) xor %eax,%eax EXIT @@ -50,6 +53,7 @@ ENTRY(__put_user_2) sub $1,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 2: movw %ax,(%_ASM_CX) xor %eax,%eax EXIT @@ -61,6 +65,7 @@ ENTRY(__put_user_4) sub $3,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 3: movl %eax,(%_ASM_CX) xor %eax,%eax EXIT @@ -72,6 +77,7 @@ ENTRY(__put_user_8) sub $7,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 4: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 5: movl %edx,4(%_ASM_CX) @@ -86,12 +92,10 @@ bad_put_user: EXIT END(bad_put_user) -.section __ex_table,"a" - _ASM_PTR 1b,bad_put_user - _ASM_PTR 2b,bad_put_user - _ASM_PTR 3b,bad_put_user - _ASM_PTR 4b,bad_put_user + _ASM_EXTABLE(1b,bad_put_user) + _ASM_EXTABLE(2b,bad_put_user) + _ASM_EXTABLE(3b,bad_put_user) + _ASM_EXTABLE(4b,bad_put_user) #ifdef CONFIG_X86_32 - _ASM_PTR 5b,bad_put_user + _ASM_EXTABLE(5b,bad_put_user) #endif -.previous diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c index 82004d2bf05..bd59090825d 100644 --- a/arch/x86/lib/string_32.c +++ b/arch/x86/lib/string_32.c @@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr); size_t strlen(const char *s) { int d0; - int res; + size_t res; asm volatile("repne\n\t" - "scasb\n\t" - "notl %0\n\t" - "decl %0" + "scasb" : "=c" (res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffu) : "memory"); - return res; + return ~res - 1; } EXPORT_SYMBOL(strlen); #endif diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 2930ae05d77..28f85c91671 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -4,8 +4,8 @@ * (inspired by Andi Kleen's thunk_64.S) * Subject to the GNU public license, v.2. No warranty of any kind. */ - #include <linux/linkage.h> + #include <asm/asm.h> #ifdef CONFIG_TRACE_IRQFLAGS /* put return address in eax (arg1) */ @@ -22,6 +22,7 @@ popl %ecx popl %eax ret + _ASM_NOKPROBE(\name) .endm thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index a63efd6bb6a..92d9feaff42 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/calling.h> +#include <asm/asm.h> /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 @@ -25,6 +26,7 @@ call \func jmp restore CFI_ENDPROC + _ASM_NOKPROBE(\name) .endm #ifdef CONFIG_TRACE_IRQFLAGS @@ -43,3 +45,4 @@ restore: RESTORE_ARGS ret CFI_ENDPROC + _ASM_NOKPROBE(restore) diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 97be9cb5448..ddf9ecb53cc 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -7,37 +7,30 @@ #include <linux/highmem.h> #include <linux/module.h> +#include <asm/word-at-a-time.h> +#include <linux/sched.h> + /* - * best effort, GUP based copy_from_user() that is NMI-safe + * We rely on the nested NMI work to allow atomic faults from the NMI path; the + * nested NMI paths are careful to preserve CR2. */ unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n) { - unsigned long offset, addr = (unsigned long)from; - unsigned long size, len = 0; - struct page *page; - void *map; - int ret; - - do { - ret = __get_user_pages_fast(addr, 1, 0, &page); - if (!ret) - break; - - offset = addr & (PAGE_SIZE - 1); - size = min(PAGE_SIZE - offset, n - len); - - map = kmap_atomic(page); - memcpy(to, map+offset, size); - kunmap_atomic(map); - put_page(page); + unsigned long ret; - len += size; - to += size; - addr += size; + if (__range_not_ok(from, n, TASK_SIZE)) + return 0; - } while (len < n); + /* + * Even though this function is typically called from NMI/IRQ context + * disable pagefaults so that its behaviour is consistent even when + * called form other contexts. + */ + pagefault_disable(); + ret = __copy_from_user_inatomic(to, from, n); + pagefault_enable(); - return len; + return ret; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index e218d5df85f..e2f5e21c03b 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -13,6 +13,7 @@ #include <linux/interrupt.h> #include <asm/uaccess.h> #include <asm/mmx.h> +#include <asm/asm.h> #ifdef CONFIG_X86_INTEL_USERCOPY /* @@ -33,93 +34,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) /* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst, src, count, res) \ -do { \ - int __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -/** - * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * Caller must check the specified block with access_ok() before calling - * this function. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* * Zero Userspace */ @@ -128,10 +42,11 @@ do { \ int __d0; \ might_fault(); \ __asm__ __volatile__( \ + ASM_STAC "\n" \ "0: rep; stosl\n" \ " movl %2,%0\n" \ "1: rep; stosb\n" \ - "2:\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: lea 0(%2,%0,4),%0\n" \ " jmp 2b\n" \ @@ -181,50 +96,6 @@ __clear_user(void __user *to, unsigned long n) } EXPORT_SYMBOL(__clear_user); -/** - * strnlen_user: - Get the size of a string in user space. - * @s: The string to measure. - * @n: The maximum valid length - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * If the string is too long, returns a value greater than @n. - */ -long strnlen_user(const char __user *s, long n) -{ - unsigned long mask = -__addr_ok(s); - unsigned long res, tmp; - - might_fault(); - - __asm__ __volatile__( - " testl %0, %0\n" - " jz 3f\n" - " andl %0,%%ecx\n" - "0: repne; scasb\n" - " setne %%al\n" - " subl %%ecx,%0\n" - " addl %0,%%eax\n" - "1:\n" - ".section .fixup,\"ax\"\n" - "2: xorl %%eax,%%eax\n" - " jmp 1b\n" - "3: movb $1,%%al\n" - " jmp 1b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,2b\n" - ".previous" - :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp) - :"0" (n), "1" (s), "2" (0), "3" (mask) - :"cc"); - return res & mask; -} -EXPORT_SYMBOL(strnlen_user); - #ifdef CONFIG_X86_INTEL_USERCOPY static unsigned long __copy_user_intel(void __user *to, const void *from, unsigned long size) @@ -286,47 +157,44 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size) "101: lea 0(%%eax,%0,4),%0\n" " jmp 100b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b,100b\n" - " .long 2b,100b\n" - " .long 3b,100b\n" - " .long 4b,100b\n" - " .long 5b,100b\n" - " .long 6b,100b\n" - " .long 7b,100b\n" - " .long 8b,100b\n" - " .long 9b,100b\n" - " .long 10b,100b\n" - " .long 11b,100b\n" - " .long 12b,100b\n" - " .long 13b,100b\n" - " .long 14b,100b\n" - " .long 15b,100b\n" - " .long 16b,100b\n" - " .long 17b,100b\n" - " .long 18b,100b\n" - " .long 19b,100b\n" - " .long 20b,100b\n" - " .long 21b,100b\n" - " .long 22b,100b\n" - " .long 23b,100b\n" - " .long 24b,100b\n" - " .long 25b,100b\n" - " .long 26b,100b\n" - " .long 27b,100b\n" - " .long 28b,100b\n" - " .long 29b,100b\n" - " .long 30b,100b\n" - " .long 31b,100b\n" - " .long 32b,100b\n" - " .long 33b,100b\n" - " .long 34b,100b\n" - " .long 35b,100b\n" - " .long 36b,100b\n" - " .long 37b,100b\n" - " .long 99b,101b\n" - ".previous" + _ASM_EXTABLE(1b,100b) + _ASM_EXTABLE(2b,100b) + _ASM_EXTABLE(3b,100b) + _ASM_EXTABLE(4b,100b) + _ASM_EXTABLE(5b,100b) + _ASM_EXTABLE(6b,100b) + _ASM_EXTABLE(7b,100b) + _ASM_EXTABLE(8b,100b) + _ASM_EXTABLE(9b,100b) + _ASM_EXTABLE(10b,100b) + _ASM_EXTABLE(11b,100b) + _ASM_EXTABLE(12b,100b) + _ASM_EXTABLE(13b,100b) + _ASM_EXTABLE(14b,100b) + _ASM_EXTABLE(15b,100b) + _ASM_EXTABLE(16b,100b) + _ASM_EXTABLE(17b,100b) + _ASM_EXTABLE(18b,100b) + _ASM_EXTABLE(19b,100b) + _ASM_EXTABLE(20b,100b) + _ASM_EXTABLE(21b,100b) + _ASM_EXTABLE(22b,100b) + _ASM_EXTABLE(23b,100b) + _ASM_EXTABLE(24b,100b) + _ASM_EXTABLE(25b,100b) + _ASM_EXTABLE(26b,100b) + _ASM_EXTABLE(27b,100b) + _ASM_EXTABLE(28b,100b) + _ASM_EXTABLE(29b,100b) + _ASM_EXTABLE(30b,100b) + _ASM_EXTABLE(31b,100b) + _ASM_EXTABLE(32b,100b) + _ASM_EXTABLE(33b,100b) + _ASM_EXTABLE(34b,100b) + _ASM_EXTABLE(35b,100b) + _ASM_EXTABLE(36b,100b) + _ASM_EXTABLE(37b,100b) + _ASM_EXTABLE(99b,101b) : "=&c"(size), "=&D" (d0), "=&S" (d1) : "1"(to), "2"(from), "0"(size) : "eax", "edx", "memory"); @@ -399,29 +267,26 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) " popl %0\n" " jmp 8b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,16b\n" - " .long 1b,16b\n" - " .long 2b,16b\n" - " .long 21b,16b\n" - " .long 3b,16b\n" - " .long 31b,16b\n" - " .long 4b,16b\n" - " .long 41b,16b\n" - " .long 10b,16b\n" - " .long 51b,16b\n" - " .long 11b,16b\n" - " .long 61b,16b\n" - " .long 12b,16b\n" - " .long 71b,16b\n" - " .long 13b,16b\n" - " .long 81b,16b\n" - " .long 14b,16b\n" - " .long 91b,16b\n" - " .long 6b,9b\n" - " .long 7b,16b\n" - ".previous" + _ASM_EXTABLE(0b,16b) + _ASM_EXTABLE(1b,16b) + _ASM_EXTABLE(2b,16b) + _ASM_EXTABLE(21b,16b) + _ASM_EXTABLE(3b,16b) + _ASM_EXTABLE(31b,16b) + _ASM_EXTABLE(4b,16b) + _ASM_EXTABLE(41b,16b) + _ASM_EXTABLE(10b,16b) + _ASM_EXTABLE(51b,16b) + _ASM_EXTABLE(11b,16b) + _ASM_EXTABLE(61b,16b) + _ASM_EXTABLE(12b,16b) + _ASM_EXTABLE(71b,16b) + _ASM_EXTABLE(13b,16b) + _ASM_EXTABLE(81b,16b) + _ASM_EXTABLE(14b,16b) + _ASM_EXTABLE(91b,16b) + _ASM_EXTABLE(6b,9b) + _ASM_EXTABLE(7b,16b) : "=&c"(size), "=&D" (d0), "=&S" (d1) : "1"(to), "2"(from), "0"(size) : "eax", "edx", "memory"); @@ -501,29 +366,26 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to, " popl %0\n" " jmp 8b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,16b\n" - " .long 1b,16b\n" - " .long 2b,16b\n" - " .long 21b,16b\n" - " .long 3b,16b\n" - " .long 31b,16b\n" - " .long 4b,16b\n" - " .long 41b,16b\n" - " .long 10b,16b\n" - " .long 51b,16b\n" - " .long 11b,16b\n" - " .long 61b,16b\n" - " .long 12b,16b\n" - " .long 71b,16b\n" - " .long 13b,16b\n" - " .long 81b,16b\n" - " .long 14b,16b\n" - " .long 91b,16b\n" - " .long 6b,9b\n" - " .long 7b,16b\n" - ".previous" + _ASM_EXTABLE(0b,16b) + _ASM_EXTABLE(1b,16b) + _ASM_EXTABLE(2b,16b) + _ASM_EXTABLE(21b,16b) + _ASM_EXTABLE(3b,16b) + _ASM_EXTABLE(31b,16b) + _ASM_EXTABLE(4b,16b) + _ASM_EXTABLE(41b,16b) + _ASM_EXTABLE(10b,16b) + _ASM_EXTABLE(51b,16b) + _ASM_EXTABLE(11b,16b) + _ASM_EXTABLE(61b,16b) + _ASM_EXTABLE(12b,16b) + _ASM_EXTABLE(71b,16b) + _ASM_EXTABLE(13b,16b) + _ASM_EXTABLE(81b,16b) + _ASM_EXTABLE(14b,16b) + _ASM_EXTABLE(91b,16b) + _ASM_EXTABLE(6b,9b) + _ASM_EXTABLE(7b,16b) : "=&c"(size), "=&D" (d0), "=&S" (d1) : "1"(to), "2"(from), "0"(size) : "eax", "edx", "memory"); @@ -592,29 +454,26 @@ static unsigned long __copy_user_intel_nocache(void *to, "9: lea 0(%%eax,%0,4),%0\n" "16: jmp 8b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,16b\n" - " .long 1b,16b\n" - " .long 2b,16b\n" - " .long 21b,16b\n" - " .long 3b,16b\n" - " .long 31b,16b\n" - " .long 4b,16b\n" - " .long 41b,16b\n" - " .long 10b,16b\n" - " .long 51b,16b\n" - " .long 11b,16b\n" - " .long 61b,16b\n" - " .long 12b,16b\n" - " .long 71b,16b\n" - " .long 13b,16b\n" - " .long 81b,16b\n" - " .long 14b,16b\n" - " .long 91b,16b\n" - " .long 6b,9b\n" - " .long 7b,16b\n" - ".previous" + _ASM_EXTABLE(0b,16b) + _ASM_EXTABLE(1b,16b) + _ASM_EXTABLE(2b,16b) + _ASM_EXTABLE(21b,16b) + _ASM_EXTABLE(3b,16b) + _ASM_EXTABLE(31b,16b) + _ASM_EXTABLE(4b,16b) + _ASM_EXTABLE(41b,16b) + _ASM_EXTABLE(10b,16b) + _ASM_EXTABLE(51b,16b) + _ASM_EXTABLE(11b,16b) + _ASM_EXTABLE(61b,16b) + _ASM_EXTABLE(12b,16b) + _ASM_EXTABLE(71b,16b) + _ASM_EXTABLE(13b,16b) + _ASM_EXTABLE(81b,16b) + _ASM_EXTABLE(14b,16b) + _ASM_EXTABLE(91b,16b) + _ASM_EXTABLE(6b,9b) + _ASM_EXTABLE(7b,16b) : "=&c"(size), "=&D" (d0), "=&S" (d1) : "1"(to), "2"(from), "0"(size) : "eax", "edx", "memory"); @@ -661,12 +520,9 @@ do { \ "3: lea 0(%3,%0,4),%0\n" \ " jmp 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 4b,5b\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ + _ASM_EXTABLE(4b,5b) \ + _ASM_EXTABLE(0b,3b) \ + _ASM_EXTABLE(1b,2b) \ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ : "3"(size), "0"(size), "1"(to), "2"(from) \ : "memory"); \ @@ -703,12 +559,9 @@ do { \ " popl %0\n" \ " jmp 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 4b,5b\n" \ - " .long 0b,3b\n" \ - " .long 1b,6b\n" \ - ".previous" \ + _ASM_EXTABLE(4b,5b) \ + _ASM_EXTABLE(0b,3b) \ + _ASM_EXTABLE(1b,6b) \ : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ : "3"(size), "0"(size), "1"(to), "2"(from) \ : "memory"); \ @@ -717,67 +570,12 @@ do { \ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) { -#ifndef CONFIG_X86_WP_WORKS_OK - if (unlikely(boot_cpu_data.wp_works_ok == 0) && - ((unsigned long)to) < TASK_SIZE) { - /* - * When we are in an atomic section (see - * mm/filemap.c:file_read_actor), return the full - * length to take the slow path. - */ - if (in_atomic()) - return n; - - /* - * CPU does not honor the WP bit when writing - * from supervisory mode, and due to preemption or SMP, - * the page tables can change at any time. - * Do it manually. Manfred <manfred@colorfullife.com> - */ - while (n) { - unsigned long offset = ((unsigned long)to)%PAGE_SIZE; - unsigned long len = PAGE_SIZE - offset; - int retval; - struct page *pg; - void *maddr; - - if (len > n) - len = n; - -survive: - down_read(¤t->mm->mmap_sem); - retval = get_user_pages(current, current->mm, - (unsigned long)to, 1, 1, 0, &pg, NULL); - - if (retval == -ENOMEM && is_global_init(current)) { - up_read(¤t->mm->mmap_sem); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto survive; - } - - if (retval != 1) { - up_read(¤t->mm->mmap_sem); - break; - } - - maddr = kmap_atomic(pg, KM_USER0); - memcpy(maddr + offset, from, len); - kunmap_atomic(maddr, KM_USER0); - set_page_dirty_lock(pg); - put_page(pg); - up_read(¤t->mm->mmap_sem); - - from += len; - to += len; - n -= len; - } - return n; - } -#endif + stac(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -785,10 +583,12 @@ EXPORT_SYMBOL(__copy_to_user_ll); unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) { + stac(); if (movsl_is_ok(to, from, n)) __copy_user_zeroing(to, from, n); else n = __copy_user_zeroing_intel(to, from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll); @@ -796,11 +596,13 @@ EXPORT_SYMBOL(__copy_from_user_ll); unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, unsigned long n) { + stac(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel((void __user *)to, (const void *)from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nozero); @@ -808,6 +610,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero); unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, unsigned long n) { + stac(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_zeroing_intel_nocache(to, from, n); @@ -816,6 +619,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, #else __copy_user_zeroing(to, from, n); #endif + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache); @@ -823,6 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { + stac(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_intel_nocache(to, from, n); @@ -831,6 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); @@ -848,14 +654,13 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); * Returns number of bytes that could not be copied. * On success, this will be zero. */ -unsigned long -copy_to_user(void __user *to, const void *from, unsigned long n) +unsigned long _copy_to_user(void __user *to, const void *from, unsigned n) { if (access_ok(VERIFY_WRITE, to, n)) n = __copy_to_user(to, from, n); return n; } -EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(_copy_to_user); /** * copy_from_user: - Copy a block of data from user space. @@ -873,8 +678,7 @@ EXPORT_SYMBOL(copy_to_user); * If some data could not be copied, this function will pad the copied * data to the requested size using zero bytes. */ -unsigned long -_copy_from_user(void *to, const void __user *from, unsigned long n) +unsigned long _copy_from_user(void *to, const void __user *from, unsigned n) { if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); @@ -883,9 +687,3 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } EXPORT_SYMBOL(_copy_from_user); - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 554b7b528f0..c905e89e19f 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -9,55 +9,6 @@ #include <asm/uaccess.h> /* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - long __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testq %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decq %1\n" \ - " jnz 0b\n" \ - "1: subq %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movq %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - return __strncpy_from_user(dst, src, count); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* * Zero Userspace */ @@ -67,6 +18,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) might_fault(); /* no memory constraint because it doesn't change any memory gcc knows about */ + stac(); asm volatile( " testq %[size8],%[size8]\n" " jz 4f\n" @@ -89,6 +41,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) : [size8] "=&c"(size), [dst] "=&D" (__d0) : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), [zero] "r" (0UL), [eight] "r" (8UL)); + clac(); return size; } EXPORT_SYMBOL(__clear_user); @@ -101,54 +54,6 @@ unsigned long clear_user(void __user *to, unsigned long n) } EXPORT_SYMBOL(clear_user); -/* - * Return the size of a string (including the ending 0) - * - * Return 0 on exception, a value greater than N if too long - */ - -long __strnlen_user(const char __user *s, long n) -{ - long res = 0; - char c; - - while (1) { - if (res>n) - return n+1; - if (__get_user(c, s)) - return 0; - if (!c) - return res+1; - res++; - s++; - } -} -EXPORT_SYMBOL(__strnlen_user); - -long strnlen_user(const char __user *s, long n) -{ - if (!access_ok(VERIFY_READ, s, 1)) - return 0; - return __strnlen_user(s, n); -} -EXPORT_SYMBOL(strnlen_user); - -long strlen_user(const char __user *s) -{ - long res = 0; - char c; - - for (;;) { - if (get_user(c, s)) - return 0; - if (!c) - return res+1; - res++; - s++; - } -} -EXPORT_SYMBOL(strlen_user); - unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) { if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { @@ -163,7 +68,7 @@ EXPORT_SYMBOL(copy_in_user); * Since protection fault in copy_from/to_user is not a normal situation, * it is not necessary to optimize tail handling. */ -unsigned long +__visible unsigned long copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) { char c; @@ -179,5 +84,6 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) if (__put_user_nocheck(c, to++, sizeof(char))) break; + clac(); return len; } diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index a793da5e560..1a2be7c6895 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -1,5 +1,9 @@ # x86 Opcode Maps # +# This is (mostly) based on following documentations. +# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C +# (#326018-047US, June 2013) +# #<Opcode maps> # Table: table-name # Referrer: escaped-name @@ -15,11 +19,15 @@ # EndTable # # AVX Superscripts -# (VEX): this opcode can accept VEX prefix. -# (oVEX): this opcode requires VEX prefix. -# (o128): this opcode only supports 128bit VEX. -# (o256): this opcode only supports 256bit VEX. +# (v): this opcode requires VEX prefix. +# (v1): this opcode only supports 128bit VEX. # +# Last Prefix Superscripts +# - (66): the last prefix is 0x66 +# - (F3): the last prefix is 0xF3 +# - (F2): the last prefix is 0xF2 +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) +# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. Table: one byte opcode Referrer: @@ -199,8 +207,8 @@ a0: MOV AL,Ob a1: MOV rAX,Ov a2: MOV Ob,AL a3: MOV Ov,rAX -a4: MOVS/B Xb,Yb -a5: MOVS/W/D/Q Xv,Yv +a4: MOVS/B Yb,Xb +a5: MOVS/W/D/Q Yv,Xv a6: CMPS/B Xb,Yb a7: CMPS/W/D Xv,Yv a8: TEST AL,Ib @@ -210,7 +218,9 @@ ab: STOS/W/D/Q Yv,rAX ac: LODS/B AL,Xb ad: LODS/W/D/Q rAX,Xv ae: SCAS/B AL,Yb -af: SCAS/W/D/Q rAX,Xv +# Note: The May 2011 Intel manual shows Xv for the second parameter of the +# next instruction but Yv is correct +af: SCAS/W/D/Q rAX,Yv # 0xb0 - 0xbf b0: MOV AL/R8L,Ib b1: MOV CL/R9L,Ib @@ -233,10 +243,10 @@ c0: Grp2 Eb,Ib (1A) c1: Grp2 Ev,Ib (1A) c2: RETN Iw (f64) c3: RETN -c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) -c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) -c6: Grp11 Eb,Ib (1A) -c7: Grp11 Ev,Iz (1A) +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +c6: Grp11A Eb,Ib (1A) +c7: Grp11B Ev,Iz (1A) c8: ENTER Iw,Ib c9: LEAVE (d64) ca: RETF Iw @@ -282,8 +292,8 @@ ef: OUT DX,eAX # 0xf0 - 0xff f0: LOCK (Prefix) f1: -f2: REPNE (Prefix) -f3: REP/REPE (Prefix) +f2: REPNE (Prefix) | XACQUIRE (Prefix) +f3: REP/REPE (Prefix) | XRELEASE (Prefix) f4: HLT f5: CMC f6: Grp3_1 Eb (1A) @@ -315,23 +325,29 @@ AVXcode: 1 0a: 0b: UD2 (1B) 0c: -0d: NOP Ev | GrpP +# AMD's prefetch group. Intel supports prefetchw(/1) only. +0d: GrpP 0e: FEMMS # 3DNow! uses the last imm byte as opcode extension. 0f: 3DNow! Pq,Qq,Ib # 0x0f 0x10-0x1f -10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) -11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) -12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) -13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) -14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) -15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) -16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) -17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) +# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands +# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. +# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. +# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming +# Reference A.1 +10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) +11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) +12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) +13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) +14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) +15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) +16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) +17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) 18: Grp16 (1A) 19: -1a: -1b: +1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv +1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv 1c: 1d: 1e: @@ -345,14 +361,14 @@ AVXcode: 1 25: 26: 27: -28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) -29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) -2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) -2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) -2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) -2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) -2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) -2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) +28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) +29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) +2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) +2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) +2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) +2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) +2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) # 0x0f 0x30-0x3f 30: WRMSR 31: RDTSC @@ -388,65 +404,66 @@ AVXcode: 1 4e: CMOVLE/NG Gv,Ev 4f: CMOVNLE/G Gv,Ev # 0x0f 0x50-0x5f -50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) -51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) -52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) -53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) -54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) -55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) -56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) -57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) -58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) -59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) -5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) -5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) -5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) -5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) -5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) -5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) +50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) +51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) +52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) +53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) +54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) +55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) +56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) +57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) +58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) +59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) +5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) +5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) +5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) +5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) +5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) # 0x0f 0x60-0x6f -60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) -61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) -62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) -63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) -64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) -65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) -66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) -67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) -68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) -69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) -6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) -6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) -6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) -6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) -6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) -6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) +60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) +61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) +62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) +63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) +64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) +65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) +66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) +67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) +68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) +69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) +6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) +6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) +6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) +6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) +6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) # 0x0f 0x70-0x7f -70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) +70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) 71: Grp12 (1A) 72: Grp13 (1A) 73: Grp14 (1A) -74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) -75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) -76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) -77: emms/vzeroupper/vzeroall (VEX) -78: VMREAD Ed/q,Gd/q -79: VMWRITE Gd/q,Ed/q +74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) +75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) +76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) +# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. +77: emms | vzeroupper | vzeroall +78: VMREAD Ey,Gy +79: VMWRITE Gy,Ey 7a: 7b: -7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) -7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) -7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) -7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) +7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) +7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) +7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) # 0x0f 0x80-0x8f 80: JO Jz (f64) 81: JNO Jz (f64) -82: JB/JNAE/JC Jz (f64) -83: JNB/JAE/JNC Jz (f64) -84: JZ/JE Jz (f64) -85: JNZ/JNE Jz (f64) +82: JB/JC/JNAE Jz (f64) +83: JAE/JNB/JNC Jz (f64) +84: JE/JZ Jz (f64) +85: JNE/JNZ Jz (f64) 86: JBE/JNA Jz (f64) -87: JNBE/JA Jz (f64) +87: JA/JNBE Jz (f64) 88: JS Jz (f64) 89: JNS Jz (f64) 8a: JP/JPE Jz (f64) @@ -498,22 +515,22 @@ b4: LFS Gv,Mp b5: LGS Gv,Mp b6: MOVZX Gv,Eb b7: MOVZX Gv,Ew -b8: JMPE | POPCNT Gv,Ev (F3) +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) b9: Grp10 (1A) ba: Grp8 Ev,Ib (1A) bb: BTC Ev,Gv -bc: BSF Gv,Ev -bd: BSR Gv,Ev +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) be: MOVSX Gv,Eb bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf c0: XADD Eb,Gb c1: XADD Ev,Gv -c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) -c3: movnti Md/q,Gd/q -c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) -c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) -c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) +c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) +c3: movnti My,Gy +c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) +c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) +c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) c7: Grp9 (1A) c8: BSWAP RAX/EAX/R8/R8D c9: BSWAP RCX/ECX/R9/R9D @@ -524,55 +541,55 @@ cd: BSWAP RBP/EBP/R13/R13D ce: BSWAP RSI/ESI/R14/R14D cf: BSWAP RDI/EDI/R15/R15D # 0x0f 0xd0-0xdf -d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) -d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) -d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) -d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) -d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) -d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) -d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) -d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) -d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) -d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) -da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) -db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) -dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) -dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) -de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) -df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) +d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) +d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) +d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) +d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) +d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) +d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) +d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) +d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) +d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) +da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) +dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) +de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) # 0x0f 0xe0-0xef -e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) -e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) -e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) -e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) -e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) -e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) -e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) -e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) -e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) -e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) -ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) -eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) -ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) -ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) -ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) -ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) +e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) +e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) +e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) +e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) +e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) +e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) +e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) +e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) +ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) +ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) +ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) # 0x0f 0xf0-0xff -f0: lddqu Vdq,Mdq (F2),(VEX) -f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) -f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) -f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) -f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) -f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) -f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) -f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) -f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) -f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) -fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) -fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) -fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) -fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) -fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) +f0: vlddqu Vx,Mx (F2) +f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) +f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) +f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) +f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) +f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) +f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) +f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) +f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) +f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) +fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) +fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) +fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) +fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) +fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) ff: EndTable @@ -580,155 +597,193 @@ Table: 3-byte opcode 1 (0x0f 0x38) Referrer: 3-byte escape 1 AVXcode: 2 # 0x0f 0x38 0x00-0x0f -00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) -01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) -02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) -03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) -04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) -05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) -06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) -07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) -08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) -09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) -0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) -0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) -0c: Vpermilps /r (66),(oVEX) -0d: Vpermilpd /r (66),(oVEX) -0e: vtestps /r (66),(oVEX) -0f: vtestpd /r (66),(oVEX) +00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) +01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) +02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) +03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) +04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) +05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) +06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) +07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) +08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) +09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) +0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) +0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) +0c: vpermilps Vx,Hx,Wx (66),(v) +0d: vpermilpd Vx,Hx,Wx (66),(v) +0e: vtestps Vx,Wx (66),(v) +0f: vtestpd Vx,Wx (66),(v) # 0x0f 0x38 0x10-0x1f 10: pblendvb Vdq,Wdq (66) 11: 12: -13: +13: vcvtph2ps Vx,Wx,Ib (66),(v) 14: blendvps Vdq,Wdq (66) 15: blendvpd Vdq,Wdq (66) -16: -17: ptest Vdq,Wdq (66),(VEX) -18: vbroadcastss /r (66),(oVEX) -19: vbroadcastsd /r (66),(oVEX),(o256) -1a: vbroadcastf128 /r (66),(oVEX),(o256) +16: vpermps Vqq,Hqq,Wqq (66),(v) +17: vptest Vx,Wx (66) +18: vbroadcastss Vx,Wd (66),(v) +19: vbroadcastsd Vqq,Wq (66),(v) +1a: vbroadcastf128 Vqq,Mdq (66),(v) 1b: -1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) -1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) -1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) +1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) +1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) +1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) 1f: # 0x0f 0x38 0x20-0x2f -20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) -21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) -22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) -23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) -24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) -25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) +20: vpmovsxbw Vx,Ux/Mq (66),(v1) +21: vpmovsxbd Vx,Ux/Md (66),(v1) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) +24: vpmovsxwq Vx,Ux/Md (66),(v1) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) 26: 27: -28: pmuldq Vdq,Wdq (66),(VEX),(o128) -29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) -2a: movntdqa Vdq,Mdq (66),(VEX),(o128) -2b: packusdw Vdq,Wdq (66),(VEX),(o128) -2c: vmaskmovps(ld) /r (66),(oVEX) -2d: vmaskmovpd(ld) /r (66),(oVEX) -2e: vmaskmovps(st) /r (66),(oVEX) -2f: vmaskmovpd(st) /r (66),(oVEX) +28: vpmuldq Vx,Hx,Wx (66),(v1) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) +2a: vmovntdqa Vx,Mx (66),(v1) +2b: vpackusdw Vx,Hx,Wx (66),(v1) +2c: vmaskmovps Vx,Hx,Mx (66),(v) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2e: vmaskmovps Mx,Hx,Vx (66),(v) +2f: vmaskmovpd Mx,Hx,Vx (66),(v) # 0x0f 0x38 0x30-0x3f -30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) -31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) -32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) -33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) -34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) -35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) -36: -37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) -38: pminsb Vdq,Wdq (66),(VEX),(o128) -39: pminsd Vdq,Wdq (66),(VEX),(o128) -3a: pminuw Vdq,Wdq (66),(VEX),(o128) -3b: pminud Vdq,Wdq (66),(VEX),(o128) -3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) -3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) -3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) -3f: pmaxud Vdq,Wdq (66),(VEX),(o128) +30: vpmovzxbw Vx,Ux/Mq (66),(v1) +31: vpmovzxbd Vx,Ux/Md (66),(v1) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) +34: vpmovzxwq Vx,Ux/Md (66),(v1) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) +36: vpermd Vqq,Hqq,Wqq (66),(v) +37: vpcmpgtq Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) +39: vpminsd Vx,Hx,Wx (66),(v1) +3a: vpminuw Vx,Hx,Wx (66),(v1) +3b: vpminud Vx,Hx,Wx (66),(v1) +3c: vpmaxsb Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3e: vpmaxuw Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) # 0x0f 0x38 0x40-0x8f -40: pmulld Vdq,Wdq (66),(VEX),(o128) -41: phminposuw Vdq,Wdq (66),(VEX),(o128) -80: INVEPT Gd/q,Mdq (66) -81: INVPID Gd/q,Mdq (66) +40: vpmulld Vx,Hx,Wx (66),(v1) +41: vphminposuw Vdq,Wdq (66),(v1) +42: +43: +44: +45: vpsrlvd/q Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) +47: vpsllvd/q Vx,Hx,Wx (66),(v) +# Skip 0x48-0x57 +58: vpbroadcastd Vx,Wx (66),(v) +59: vpbroadcastq Vx,Wx (66),(v) +5a: vbroadcasti128 Vqq,Mdq (66),(v) +# Skip 0x5b-0x77 +78: vpbroadcastb Vx,Wx (66),(v) +79: vpbroadcastw Vx,Wx (66),(v) +# Skip 0x7a-0x7f +80: INVEPT Gy,Mdq (66) +81: INVPID Gy,Mdq (66) +82: INVPCID Gy,Mdq (66) +8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) # 0x0f 0x38 0x90-0xbf (FMA) -96: vfmaddsub132pd/ps /r (66),(VEX) -97: vfmsubadd132pd/ps /r (66),(VEX) -98: vfmadd132pd/ps /r (66),(VEX) -99: vfmadd132sd/ss /r (66),(VEX),(o128) -9a: vfmsub132pd/ps /r (66),(VEX) -9b: vfmsub132sd/ss /r (66),(VEX),(o128) -9c: vfnmadd132pd/ps /r (66),(VEX) -9d: vfnmadd132sd/ss /r (66),(VEX),(o128) -9e: vfnmsub132pd/ps /r (66),(VEX) -9f: vfnmsub132sd/ss /r (66),(VEX),(o128) -a6: vfmaddsub213pd/ps /r (66),(VEX) -a7: vfmsubadd213pd/ps /r (66),(VEX) -a8: vfmadd213pd/ps /r (66),(VEX) -a9: vfmadd213sd/ss /r (66),(VEX),(o128) -aa: vfmsub213pd/ps /r (66),(VEX) -ab: vfmsub213sd/ss /r (66),(VEX),(o128) -ac: vfnmadd213pd/ps /r (66),(VEX) -ad: vfnmadd213sd/ss /r (66),(VEX),(o128) -ae: vfnmsub213pd/ps /r (66),(VEX) -af: vfnmsub213sd/ss /r (66),(VEX),(o128) -b6: vfmaddsub231pd/ps /r (66),(VEX) -b7: vfmsubadd231pd/ps /r (66),(VEX) -b8: vfmadd231pd/ps /r (66),(VEX) -b9: vfmadd231sd/ss /r (66),(VEX),(o128) -ba: vfmsub231pd/ps /r (66),(VEX) -bb: vfmsub231sd/ss /r (66),(VEX),(o128) -bc: vfnmadd231pd/ps /r (66),(VEX) -bd: vfnmadd231sd/ss /r (66),(VEX),(o128) -be: vfnmsub231pd/ps /r (66),(VEX) -bf: vfnmsub231sd/ss /r (66),(VEX),(o128) +90: vgatherdd/q Vx,Hx,Wx (66),(v) +91: vgatherqd/q Vx,Hx,Wx (66),(v) +92: vgatherdps/d Vx,Hx,Wx (66),(v) +93: vgatherqps/d Vx,Hx,Wx (66),(v) +94: +95: +96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) +97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) +98: vfmadd132ps/d Vx,Hx,Wx (66),(v) +99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) +9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) +9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) +9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) +a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) +a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) +a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) +ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) +ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) +af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) +b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) +b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) +b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) +bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) +bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) +bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) # 0x0f 0x38 0xc0-0xff -db: aesimc Vdq,Wdq (66),(VEX),(o128) -dc: aesenc Vdq,Wdq (66),(VEX),(o128) -dd: aesenclast Vdq,Wdq (66),(VEX),(o128) -de: aesdec Vdq,Wdq (66),(VEX),(o128) -df: aesdeclast Vdq,Wdq (66),(VEX),(o128) -f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) -f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +db: VAESIMC Vdq,Wdq (66),(v1) +dc: VAESENC Vdq,Hdq,Wdq (66),(v1) +dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) +de: VAESDEC Vdq,Hdq,Wdq (66),(v1) +df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) +f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) +f2: ANDN Gy,By,Ey (v) +f3: Grp17 (1A) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) EndTable Table: 3-byte opcode 2 (0x0f 0x3a) Referrer: 3-byte escape 2 AVXcode: 3 # 0x0f 0x3a 0x00-0xff -04: vpermilps /r,Ib (66),(oVEX) -05: vpermilpd /r,Ib (66),(oVEX) -06: vperm2f128 /r,Ib (66),(oVEX),(o256) -08: roundps Vdq,Wdq,Ib (66),(VEX) -09: roundpd Vdq,Wdq,Ib (66),(VEX) -0a: roundss Vss,Wss,Ib (66),(VEX),(o128) -0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) -0c: blendps Vdq,Wdq,Ib (66),(VEX) -0d: blendpd Vdq,Wdq,Ib (66),(VEX) -0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) -0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) -14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) -15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) -16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) -17: extractps Ed,Vdq,Ib (66),(VEX),(o128) -18: vinsertf128 /r,Ib (66),(oVEX),(o256) -19: vextractf128 /r,Ib (66),(oVEX),(o256) -20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) -21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) -22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) -40: dpps Vdq,Wdq,Ib (66),(VEX) -41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) -42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) -44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) -4a: vblendvps /r,Ib (66),(oVEX) -4b: vblendvpd /r,Ib (66),(oVEX) -4c: vpblendvb /r,Ib (66),(oVEX),(o128) -60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) -61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) -62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) -63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) -df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) +00: vpermq Vqq,Wqq,Ib (66),(v) +01: vpermpd Vqq,Wqq,Ib (66),(v) +02: vpblendd Vx,Hx,Wx,Ib (66),(v) +03: +04: vpermilps Vx,Wx,Ib (66),(v) +05: vpermilpd Vx,Wx,Ib (66),(v) +06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) +07: +08: vroundps Vx,Wx,Ib (66) +09: vroundpd Vx,Wx,Ib (66) +0a: vroundss Vss,Wss,Ib (66),(v1) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) +0c: vblendps Vx,Hx,Wx,Ib (66) +0d: vblendpd Vx,Hx,Wx,Ib (66) +0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) +0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) +14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) +15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) +16: vpextrd/q Ey,Vdq,Ib (66),(v1) +17: vextractps Ed,Vdq,Ib (66),(v1) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) +19: vextractf128 Wdq,Vqq,Ib (66),(v) +1d: vcvtps2ph Wx,Vx,Ib (66),(v) +20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) +21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) +22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) +39: vextracti128 Wdq,Vqq,Ib (66),(v) +40: vdpps Vx,Hx,Wx,Ib (66) +41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) +4a: vblendvps Vx,Hx,Wx,Lx (66),(v) +4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) +4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) +61: vpcmpestri Vdq,Wdq,Ib (66),(v1) +62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) +63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) +f0: RORX Gy,Ey,Ib (F2),(v) EndTable GrpTable: Grp1 @@ -790,7 +845,7 @@ GrpTable: Grp5 2: CALLN Ev (f64) 3: CALLF Ep 4: JMPN Ev (f64) -5: JMPF Ep +5: JMPF Mp 6: PUSH Ev (d64) 7: EndTable @@ -806,8 +861,8 @@ EndTable GrpTable: Grp7 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) -1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) -2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) 3: LIDT Ms 4: SMSW Mw/Rv 5: @@ -824,44 +879,51 @@ EndTable GrpTable: Grp9 1: CMPXCHG8B/16B Mq/Mdq -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) -7: VMPTRST Mq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable GrpTable: Grp10 EndTable -GrpTable: Grp11 -0: MOV +# Grp11A and Grp11B are expressed as Grp11 in Intel SDM +GrpTable: Grp11A +0: MOV Eb,Ib +7: XABORT Ib (000),(11B) +EndTable + +GrpTable: Grp11B +0: MOV Eb,Iz +7: XBEGIN Jz (000),(11B) EndTable GrpTable: Grp12 -2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) -4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) -6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) +2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) +4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) +6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) EndTable GrpTable: Grp13 -2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) -4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) -6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) +2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) EndTable GrpTable: Grp14 -2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) -3: psrldq Udq,Ib (66),(11B),(VEX),(o128) -6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) -7: pslldq Udq,Ib (66),(11B),(VEX),(o128) +2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) +3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) +6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) +7: vpslldq Hx,Ux,Ib (66),(11B),(v1) EndTable GrpTable: Grp15 -0: fxsave -1: fxstor -2: ldmxcsr (VEX) -3: stmxcsr (VEX) +0: fxsave | RDFSBASE Ry (F3),(11B) +1: fxstor | RDGSBASE Ry (F3),(11B) +2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) +3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE 5: XRSTOR | lfence (11B) -6: mfence (11B) +6: XSAVEOPT | mfence (11B) 7: clflush | sfence (11B) EndTable @@ -872,6 +934,12 @@ GrpTable: Grp16 3: prefetch T2 EndTable +GrpTable: Grp17 +1: BLSR By,Ey (v) +2: BLSMSK By,Ey (v) +3: BLSI By,Ey (v) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH |
