From 51115d4d45700fc7c08306f7ba6e68551f526ae5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:10 -0400 Subject: x86, fpu: Merge tolerant_fwait() Commit e2e75c91 merged the math exception handler, allowing both 32-bit and 64-bit to handle math exceptions from kernel mode. Switch to using the 64-bit version of tolerant_fwait() without fnclex, which simply ignores the exception if one is still pending from userspace. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-4-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index a73a8d5a5e6..5d8f9a79fa5 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -77,15 +77,6 @@ static inline void sanitize_i387_state(struct task_struct *tsk) } #ifdef CONFIG_X86_64 - -/* Ignore delayed exceptions from user space */ -static inline void tolerant_fwait(void) -{ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); -} - static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { int err; @@ -220,11 +211,6 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft); static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif -static inline void tolerant_fwait(void) -{ - asm volatile("fnclex ; fwait"); -} - /* perform fxrstor iff the processor has extended states, otherwise frstor */ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { @@ -344,7 +330,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk) static inline void __clear_fpu(struct task_struct *tsk) { if (task_thread_info(tsk)->status & TS_USEDFPU) { - tolerant_fwait(); + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); task_thread_info(tsk)->status &= ~TS_USEDFPU; stts(); } -- cgit v1.2.3-18-g5258 From bfd946cb891800d408decaae268a3480775178a3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:11 -0400 Subject: x86, fpu: Merge __save_init_fpu() __save_init_fpu() is identical for 32-bit and 64-bit. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-5-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 5d8f9a79fa5..88065e3a03c 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -197,12 +197,6 @@ static inline void fpu_save_init(struct fpu *fpu) fpu_clear(fpu); } -static inline void __save_init_fpu(struct task_struct *tsk) -{ - fpu_save_init(&tsk->thread.fpu); - task_thread_info(tsk)->status &= ~TS_USEDFPU; -} - #else /* CONFIG_X86_32 */ #ifdef CONFIG_MATH_EMULATION @@ -285,15 +279,14 @@ end: ; } +#endif /* CONFIG_X86_64 */ + static inline void __save_init_fpu(struct task_struct *tsk) { fpu_save_init(&tsk->thread.fpu); task_thread_info(tsk)->status &= ~TS_USEDFPU; } - -#endif /* CONFIG_X86_64 */ - static inline int fpu_fxrstor_checking(struct fpu *fpu) { return fxrstor_checking(&fpu->state->fxsave); -- cgit v1.2.3-18-g5258 From a4d4fbc7735bba6654b20f859135f9d3f8fe7f76 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:12 -0400 Subject: x86-64, fpu: Disable preemption when using TS_USEDFPU Consolidates code and fixes the below race for 64-bit. commit 9fa2f37bfeb798728241cc4a19578ce6e4258f25 Author: torvalds Date: Tue Sep 2 07:37:25 2003 +0000 Be a lot more careful about TS_USEDFPU and preemption We had some races where we testecd (or set) TS_USEDFPU together with sequences that depended on the setting (like clearing or setting the TS flag in %cr0) and we could be preempted in between, which screws up the FPU state, since preemption will itself change USEDFPU and the TS flag. This makes it a lot more explicit: the "internal" low-level FPU functions ("__xxxx_fpu()") all require preemption to be disabled, and the exported "real" functions will make sure that is the case. One case - in __switch_to() - was switched to the non-preempt-safe internal version, since the scheduler itself has already disabled preemption. BKrev: 3f5448b5WRiQuyzAlbajs3qoQjSobw Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-6-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 88065e3a03c..8b40a8379cc 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -387,19 +387,6 @@ static inline void irq_ts_restore(int TS_state) stts(); } -#ifdef CONFIG_X86_64 - -static inline void save_init_fpu(struct task_struct *tsk) -{ - __save_init_fpu(tsk); - stts(); -} - -#define unlazy_fpu __unlazy_fpu -#define clear_fpu __clear_fpu - -#else /* CONFIG_X86_32 */ - /* * These disable preemption on their own and are safe */ @@ -425,8 +412,6 @@ static inline void clear_fpu(struct task_struct *tsk) preempt_enable(); } -#endif /* CONFIG_X86_64 */ - /* * i387 state interaction */ -- cgit v1.2.3-18-g5258 From 820241356d6aa9a895fc10def15794a5a5bfcd98 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:14 -0400 Subject: x86-64, fpu: Simplify constraints for fxsave/fxtstor Use the "R" constraint (legacy register) instead of listing all the possible registers. Clean up the comments as well. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-8-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 8b40a8379cc..768fcb25900 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -81,6 +81,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { int err; + /* See comment in fxsave() below. */ asm volatile("1: rex64/fxrstor (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -89,11 +90,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err) -#if 0 /* See comment in fxsave() below. */ - : [fx] "r" (fx), "m" (*fx), "0" (0)); -#else - : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); -#endif + : [fx] "R" (fx), "m" (*fx), "0" (0)); return err; } @@ -140,6 +137,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) if (unlikely(err)) return -EFAULT; + /* See comment in fxsave() below. */ asm volatile("1: rex64/fxsave (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -148,11 +146,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err), "=m" (*fx) -#if 0 /* See comment in fxsave() below. */ - : [fx] "r" (fx), "0" (0)); -#else - : [fx] "cdaSDb" (fx), "0" (0)); -#endif + : [fx] "R" (fx), "0" (0)); if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct))) err = -EFAULT; @@ -165,26 +159,22 @@ static inline void fpu_fxsave(struct fpu *fpu) /* Using "rex64; fxsave %0" is broken because, if the memory operand uses any extended registers for addressing, a second REX prefix will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ -#if 0 - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (fpu->state->fxsave)); -#elif 0 - /* Using, as a workaround, the properly prefixed form below isn't + is a separate instruction), and hence the 64-bitness is lost. + Using "fxsaveq %0" would be the ideal choice, but is only supported + starting with gas 2.16. + asm volatile("fxsaveq %0" + : "=m" (fpu->state->fxsave)); + Using, as a workaround, the properly prefixed form below isn't accepted by any binutils version so far released, complaining that the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). */ - __asm__ __volatile__("rex64/fxsave %0" - : "=m" (fpu->state->fxsave)); -#else - /* This, however, we can work around by forcing the compiler to select + needed for addressing (fix submitted to mainline 2005-11-21). + asm volatile("rex64/fxsave %0" + : "=m" (fpu->state->fxsave)); + This, however, we can work around by forcing the compiler to select an addressing mode that doesn't require extended registers. */ - __asm__ __volatile__("rex64/fxsave (%1)" - : "=m" (fpu->state->fxsave) - : "cdaSDb" (&fpu->state->fxsave)); -#endif + asm volatile("rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); } static inline void fpu_save_init(struct fpu *fpu) -- cgit v1.2.3-18-g5258 From 8eb91a577d7763d21628f6761045328784b1911c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:16 -0400 Subject: x86, fpu: Remove unnecessary ifdefs from i387 code. Remove ifdefs for code that the compiler can optimize away on 64-bit. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-10-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 768fcb25900..42b507e16d1 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf); extern int restore_i387_xstate_ia32(void __user *buf); #endif +#ifdef CONFIG_MATH_EMULATION +extern void finit_soft_fpu(struct i387_soft_struct *soft); +#else +static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} +#endif + #define X87_FSW_ES (1 << 7) /* Exception Summary */ static __always_inline __pure bool use_xsaveopt(void) @@ -189,12 +195,6 @@ static inline void fpu_save_init(struct fpu *fpu) #else /* CONFIG_X86_32 */ -#ifdef CONFIG_MATH_EMULATION -extern void finit_soft_fpu(struct i387_soft_struct *soft); -#else -static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} -#endif - /* perform fxrstor iff the processor has extended states, otherwise frstor */ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { -- cgit v1.2.3-18-g5258 From eec73f813ab0954253e5e2168119c4555f83f07d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:17 -0400 Subject: x86, fpu: Remove PSHUFB_XMM5_* macros The PSHUFB_XMM5_* macros are no longer used. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-11-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 42b507e16d1..907967e4fa8 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -465,7 +465,4 @@ extern void fpu_finit(struct fpu *fpu); #endif /* __ASSEMBLY__ */ -#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 -#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 - #endif /* _ASM_X86_I387_H */ -- cgit v1.2.3-18-g5258 From 58a992b9cbaf449aeebd3575c3695a9eb5d95b5e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:18 -0400 Subject: x86-32, fpu: Rewrite fpu_save_init() Rewrite fpu_save_init() to prepare for merging with 64-bit. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-12-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 47 +++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 907967e4fa8..b45abefb89f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -73,6 +73,11 @@ static __always_inline __pure bool use_xsave(void) return static_cpu_has(X86_FEATURE_XSAVE); } +static __always_inline __pure bool use_fxsr(void) +{ + return static_cpu_has(X86_FEATURE_FXSR); +} + extern void __sanitize_i387_state(struct task_struct *); static inline void sanitize_i387_state(struct task_struct *tsk) @@ -211,6 +216,12 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return 0; } +static inline void fpu_fxsave(struct fpu *fpu) +{ + asm volatile("fxsave %[fx]" + : [fx] "=m" (fpu->state->fxsave)); +} + /* We need a safe address that is cheap to find and that is already in L1 during context switch. The best choices are unfortunately different for UP and SMP */ @@ -226,36 +237,24 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) static inline void fpu_save_init(struct fpu *fpu) { if (use_xsave()) { - struct xsave_struct *xstate = &fpu->state->xsave; - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - fpu_xsave(fpu); /* * xsave header may indicate the init state of the FP. */ - if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) - goto end; - - if (unlikely(fx->swd & X87_FSW_ES)) - asm volatile("fnclex"); - - /* - * we can do a simple return here or be paranoid :) - */ - goto clear_state; + if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) + return; + } else if (use_fxsr()) { + fpu_fxsave(fpu); + } else { + asm volatile("fsave %[fx]; fwait" + : [fx] "=m" (fpu->state->fsave)); + return; } - /* Use more nops than strictly needed in case the compiler - varies code */ - alternative_input( - "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4, - "fxsave %[fx]\n" - "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", - X86_FEATURE_FXSR, - [fx] "m" (fpu->state->fxsave), - [fsw] "m" (fpu->state->fxsave.swd) : "memory"); -clear_state: + if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) + asm volatile("fnclex"); + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ @@ -265,8 +264,6 @@ clear_state: "fildl %[addr]", /* set F?P to defined value */ X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); -end: - ; } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3-18-g5258 From b2b57fe053c9cf8b8af5a0e826a465996afed0ff Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:19 -0400 Subject: x86, fpu: Merge fpu_save_init() Make 64-bit use the 32-bit version of fpu_save_init(). Remove unused clear_fpu_state(). Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-13-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 48 ++++----------------------------------------- 1 file changed, 4 insertions(+), 44 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index b45abefb89f..70626ed96cb 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -105,36 +105,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return err; } -/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. The kernel data segment can be sometimes 0 and sometimes - new user value. Both should be ok. - Use the PDA as safe address because it should be already in L1. */ -static inline void fpu_clear(struct fpu *fpu) -{ - struct xsave_struct *xstate = &fpu->state->xsave; - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - - /* - * xsave header may indicate the init state of the FP. - */ - if (use_xsave() && - !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) - return; - - if (unlikely(fx->swd & X87_FSW_ES)) - asm volatile("fnclex"); - alternative_input(ASM_NOP8 ASM_NOP2, - " emms\n" /* clear stack tags */ - " fildl %%gs:0", /* load to clear state */ - X86_FEATURE_FXSAVE_LEAK); -} - -static inline void clear_fpu_state(struct task_struct *tsk) -{ - fpu_clear(&tsk->thread.fpu); -} - static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; @@ -188,16 +158,6 @@ static inline void fpu_fxsave(struct fpu *fpu) : [fx] "R" (&fpu->state->fxsave)); } -static inline void fpu_save_init(struct fpu *fpu) -{ - if (use_xsave()) - fpu_xsave(fpu); - else - fpu_fxsave(fpu); - - fpu_clear(fpu); -} - #else /* CONFIG_X86_32 */ /* perform fxrstor iff the processor has extended states, otherwise frstor */ @@ -222,6 +182,8 @@ static inline void fpu_fxsave(struct fpu *fpu) : [fx] "=m" (fpu->state->fxsave)); } +#endif /* CONFIG_X86_64 */ + /* We need a safe address that is cheap to find and that is already in L1 during context switch. The best choices are unfortunately different for UP and SMP */ @@ -259,15 +221,13 @@ static inline void fpu_save_init(struct fpu *fpu) is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ alternative_input( - GENERIC_NOP8 GENERIC_NOP2, + ASM_NOP8 ASM_NOP2, "emms\n\t" /* clear stack tags */ - "fildl %[addr]", /* set F?P to defined value */ + "fildl %P[addr]", /* set F?P to defined value */ X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); } -#endif /* CONFIG_X86_64 */ - static inline void __save_init_fpu(struct task_struct *tsk) { fpu_save_init(&tsk->thread.fpu); -- cgit v1.2.3-18-g5258 From d7acb92fea932ad2e7846480aeacddc2c03c8485 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 13 Oct 2010 16:00:29 -0700 Subject: x86-64, asm: If the assembler supports fxsave64, use it Kbuild allows for us to probe for the existence of specific constructs in the assembler, use them to find out if we can use fxsave64 and permit the compiler to generate better code. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index a73a8d5a5e6..70f105b352e 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -175,7 +175,7 @@ static inline void fpu_fxsave(struct fpu *fpu) uses any extended registers for addressing, a second REX prefix will be generated (to the assembler, rex64 followed by semicolon is a separate instruction), and hence the 64-bitness is lost. */ -#if 0 +#ifdef CONFIG_AS_FXSAVEQ /* Using "fxsaveq %0" would be the ideal choice, but is only supported starting with gas 2.16. */ __asm__ __volatile__("fxsaveq %0" -- cgit v1.2.3-18-g5258