aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFenghua Yu <fenghua.yu@intel.com>2011-05-17 15:29:15 -0700
committerH. Peter Anvin <hpa@linux.intel.com>2011-05-17 15:40:28 -0700
commit4307bec9344aed83f8107c3eb4285bd9d218fc10 (patch)
treed18d0b7e6c5d0a93fc84504d546add5f1ca09c5a
parente365c9df2f2f001450decf9512412d2d5bd1cdef (diff)
x86, mem: copy_user_64.S: Support copy_to/from_user by enhanced REP MOVSB/STOSB
Support copy_to_user/copy_from_user() by enhanced REP MOVSB/STOSB. On processors supporting enhanced REP MOVSB/STOSB, the alternative copy_user_enhanced_fast_string function using enhanced rep movsb overrides the original function and the fast string function. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Link: http://lkml.kernel.org/r/1305671358-14478-7-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--arch/x86/lib/copy_user_64.S65
1 files changed, 55 insertions, 10 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 99e48261519..d17a1170bbf 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,23 +15,30 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
- .macro ALTERNATIVE_JUMP feature,orig,alt
+/*
+ * By placing feature2 after feature1 in altinstructions section, we logically
+ * implement:
+ * If CPU has feature2, jmp to alt2 is used
+ * else if CPU has feature1, jmp to alt1 is used
+ * else jmp to orig is used.
+ */
+ .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
0:
.byte 0xe9 /* 32bit jump */
.long \orig-1f /* by default jump to orig */
1:
.section .altinstr_replacement,"ax"
2: .byte 0xe9 /* near jump with 32bit immediate */
- .long \alt-1b /* offset */ /* or alternatively to alt */
+ .long \alt1-1b /* offset */ /* or alternatively to alt1 */
+3: .byte 0xe9 /* near jump with 32bit immediate */
+ .long \alt2-1b /* offset */ /* or alternatively to alt2 */
.previous
+
.section .altinstructions,"a"
- .align 8
- .quad 0b
- .quad 2b
- .word \feature /* when feature is set */
- .byte 5
- .byte 5
+ altinstruction_entry 0b,2b,\feature1,5,5
+ altinstruction_entry 0b,3b,\feature2,5,5
.previous
.endm
@@ -73,7 +80,9 @@ ENTRY(_copy_to_user)
jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx
jae bad_to_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
+ copy_user_generic_unrolled,copy_user_generic_string, \
+ copy_user_enhanced_fast_string
CFI_ENDPROC
ENDPROC(_copy_to_user)
@@ -86,7 +95,9 @@ ENTRY(_copy_from_user)
jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx
jae bad_from_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
+ copy_user_generic_unrolled,copy_user_generic_string, \
+ copy_user_enhanced_fast_string
CFI_ENDPROC
ENDPROC(_copy_from_user)
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
.previous
CFI_ENDPROC
ENDPROC(copy_user_generic_string)
+
+/*
+ * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
+ * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ENTRY(copy_user_enhanced_fast_string)
+ CFI_STARTPROC
+ andl %edx,%edx
+ jz 2f
+ movl %edx,%ecx
+1: rep
+ movsb
+2: xorl %eax,%eax
+ ret
+
+ .section .fixup,"ax"
+12: movl %ecx,%edx /* ecx is zerorest also */
+ jmp copy_user_handle_tail
+ .previous
+
+ .section __ex_table,"a"
+ .align 8
+ .quad 1b,12b
+ .previous
+ CFI_ENDPROC
+ENDPROC(copy_user_enhanced_fast_string)