From 8c06585d6431addadd94903843dfbcd315b42d4e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 17 Jul 2010 09:03:26 -0400 Subject: x86: Remove redundant K6 MSRs MSR_K6_EFER is unused, and MSR_K6_STAR is redundant with MSR_STAR. Signed-off-by: Brian Gerst LKML-Reference: <1279371808-24804-1-git-send-email-brgerst@gmail.com> Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr-index.h | 2 -- arch/x86/kvm/svm.c | 6 +++--- arch/x86/kvm/vmx.c | 8 ++++---- arch/x86/kvm/x86.c | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 8c7ae431862..6068e0e06e0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -159,8 +159,6 @@ #define MSR_K7_FID_VID_STATUS 0xc0010042 /* K6 MSRs */ -#define MSR_K6_EFER 0xc0000080 -#define MSR_K6_STAR 0xc0000081 #define MSR_K6_WHCR 0xc0000082 #define MSR_K6_UWCCR 0xc0000085 #define MSR_K6_EPMR 0xc0000086 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce438e0fdd2..24a22069629 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -130,7 +130,7 @@ static struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ bool always; /* True if intercept is always on */ } direct_access_msrs[] = { - { .index = MSR_K6_STAR, .always = true }, + { .index = MSR_STAR, .always = true }, { .index = MSR_IA32_SYSENTER_CS, .always = true }, #ifdef CONFIG_X86_64 { .index = MSR_GS_BASE, .always = true }, @@ -2431,7 +2431,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) *data = tsc_offset + native_read_tsc(); break; } - case MSR_K6_STAR: + case MSR_STAR: *data = svm->vmcb->save.star; break; #ifdef CONFIG_X86_64 @@ -2555,7 +2555,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) break; } - case MSR_K6_STAR: + case MSR_STAR: svm->vmcb->save.star = data; break; #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ee03679efe7..b42ad25d564 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -231,14 +231,14 @@ static u64 host_efer; static void ept_save_pdptrs(struct kvm_vcpu *vcpu); /* - * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it + * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it * away by decrementing the array size. */ static const u32 vmx_msr_index[] = { #ifdef CONFIG_X86_64 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, #endif - MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR, + MSR_EFER, MSR_TSC_AUX, MSR_STAR, }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) @@ -1057,10 +1057,10 @@ static void setup_msrs(struct vcpu_vmx *vmx) if (index >= 0 && vmx->rdtscp_enabled) move_msr_up(vmx, index, save_nmsrs++); /* - * MSR_K6_STAR is only needed on long mode guests, and only + * MSR_STAR is only needed on long mode guests, and only * if efer.sce is enabled. */ - index = __find_msr_index(vmx, MSR_K6_STAR); + index = __find_msr_index(vmx, MSR_STAR); if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) move_msr_up(vmx, index, save_nmsrs++); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05d571f6f19..6127468ebbd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -671,7 +671,7 @@ static u32 msrs_to_save[] = { HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, - MSR_K6_STAR, + MSR_STAR, #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif -- cgit v1.2.3-18-g5258 From cfaa71ee9794472598d3966c3315cd6bd8f953d3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 17 Jul 2010 09:03:27 -0400 Subject: x86: Use symbolic MSR names Use symbolic MSR names instead of hardcoding the MSR index. Signed-off-by: Brian Gerst LKML-Reference: <1279371808-24804-2-git-send-email-brgerst@gmail.com> Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/realmode/wakeup.S | 2 +- arch/x86/kernel/verify_cpu_64.S | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 580b4e29601..28595d6df47 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -104,7 +104,7 @@ _start: movl %eax, %ecx orl %edx, %ecx jz 1f - movl $0xc0000080, %ecx + movl $MSR_EFER, %ecx wrmsr 1: diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu_64.S index 45b6f8a975a..56a8c2a867d 100644 --- a/arch/x86/kernel/verify_cpu_64.S +++ b/arch/x86/kernel/verify_cpu_64.S @@ -31,6 +31,7 @@ */ #include +#include verify_cpu: pushfl # Save caller passed flags @@ -88,7 +89,7 @@ verify_cpu_sse_test: je verify_cpu_sse_ok test %di,%di jz verify_cpu_no_longmode # only try to force SSE on AMD - movl $0xc0010015,%ecx # HWCR + movl $MSR_K7_HWCR,%ecx rdmsr btr $15,%eax # enable SSE wrmsr -- cgit v1.2.3-18-g5258 From 650fb4393dff543bc980d361555c489fbdeed088 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 17 Jul 2010 09:03:28 -0400 Subject: x86-64: Simplify loading initial_gs Load initial_gs as two 32-bit values instead of splitting a 64-bit value. Signed-off-by: Brian Gerst LKML-Reference: <1279371808-24804-3-git-send-email-brgerst@gmail.com> Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_64.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3d1e6f16b7a..239046bd447 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -234,9 +234,8 @@ ENTRY(secondary_startup_64) * init data section till per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx - movq initial_gs(%rip),%rax - movq %rax,%rdx - shrq $32,%rdx + movl initial_gs(%rip),%eax + movl initial_gs+4(%rip),%edx wrmsr /* esi is pointer to real mode structure with interesting info. -- cgit v1.2.3-18-g5258 From 113fc5a6e8c2288619ff7e8187a6f556b7e0d372 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 27 Jul 2010 17:01:49 -0700 Subject: x86: Add memory modify constraints to xchg() and cmpxchg() xchg() and cmpxchg() modify their memory operands, not merely read them. For some versions of gcc the "memory" clobber has apparently dealt with the situation, but not for all. Originally-by: Linus Torvalds Signed-off-by: H. Peter Anvin Cc: Glauber Costa Cc: Avi Kivity Cc: Peter Palfrader Cc: Greg KH Cc: Alan Cox Cc: Zachary Amsden Cc: Marcelo Tosatti Cc: LKML-Reference: <4C4F7277.8050306@zytor.com> --- arch/x86/include/asm/cmpxchg_32.h | 68 +++++++++++++++++++-------------------- arch/x86/include/asm/cmpxchg_64.h | 40 +++++++++++------------ 2 files changed, 54 insertions(+), 54 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 8859e12dd3c..c1cf59d72f0 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -27,20 +27,20 @@ struct __xchg_dummy { switch (size) { \ case 1: \ asm volatile("xchgb %b0,%1" \ - : "=q" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=q" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 2: \ asm volatile("xchgw %w0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 4: \ asm volatile("xchgl %0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ default: \ @@ -70,14 +70,14 @@ static inline void __set_64bit(unsigned long long *ptr, unsigned int low, unsigned int high) { asm volatile("\n1:\t" - "movl (%0), %%eax\n\t" - "movl 4(%0), %%edx\n\t" - LOCK_PREFIX "cmpxchg8b (%0)\n\t" + "movl (%1), %%eax\n\t" + "movl 4(%1), %%edx\n\t" + LOCK_PREFIX "cmpxchg8b (%1)\n\t" "jnz 1b" - : /* no outputs */ - : "D"(ptr), - "b"(low), - "c"(high) + : "=m" (*ptr) + : "D" (ptr), + "b" (low), + "c" (high) : "ax", "dx", "memory"); } @@ -121,21 +121,21 @@ extern void __cmpxchg_wrong_size(void); __typeof__(*(ptr)) __new = (new); \ switch (size) { \ case 1: \ - asm volatile(lock "cmpxchgb %b1,%2" \ - : "=a"(__ret) \ - : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgb %b2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "q" (__new), "0" (__old) \ : "memory"); \ break; \ case 2: \ - asm volatile(lock "cmpxchgw %w1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgw %w2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ case 4: \ - asm volatile(lock "cmpxchgl %1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ default: \ @@ -180,12 +180,12 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long new) { unsigned long long prev; - asm volatile(LOCK_PREFIX "cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), - "m"(*__xg(ptr)), - "0"(old) + asm volatile(LOCK_PREFIX "cmpxchg8b %1" + : "=A" (prev), + "+m" (*__xg(ptr)) + : "b" ((unsigned long)new), + "c" ((unsigned long)(new >> 32)), + "0" (old) : "memory"); return prev; } @@ -195,12 +195,12 @@ static inline unsigned long long __cmpxchg64_local(volatile void *ptr, unsigned long long new) { unsigned long long prev; - asm volatile("cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), - "m"(*__xg(ptr)), - "0"(old) + asm volatile("cmpxchg8b %1" + : "=A" (prev), + "+m" (*__xg(ptr)) + : "b" ((unsigned long)new), + "c" ((unsigned long)(new >> 32)), + "0" (old) : "memory"); return prev; } diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 485ae415fae..b92f147339f 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -26,26 +26,26 @@ extern void __cmpxchg_wrong_size(void); switch (size) { \ case 1: \ asm volatile("xchgb %b0,%1" \ - : "=q" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=q" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 2: \ asm volatile("xchgw %w0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 4: \ asm volatile("xchgl %k0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 8: \ asm volatile("xchgq %0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ default: \ @@ -71,27 +71,27 @@ extern void __cmpxchg_wrong_size(void); __typeof__(*(ptr)) __new = (new); \ switch (size) { \ case 1: \ - asm volatile(lock "cmpxchgb %b1,%2" \ - : "=a"(__ret) \ - : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgb %b2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "q" (__new), "0" (__old) \ : "memory"); \ break; \ case 2: \ - asm volatile(lock "cmpxchgw %w1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgw %w2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ case 4: \ - asm volatile(lock "cmpxchgl %k1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgl %k2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ case 8: \ - asm volatile(lock "cmpxchgq %1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgq %2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ default: \ -- cgit v1.2.3-18-g5258 From 69309a05907546fb686b251d4ab041c26afe1e1d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 27 Jul 2010 23:29:52 -0700 Subject: x86, asm: Clean up and simplify set_64bit() Clean up and simplify set_64bit(). This code is quite old (1.3.11) and contains a fair bit of auxilliary machinery that current versions of gcc handle just fine automatically. Worse, the auxilliary machinery can actually cause an unnecessary spill to memory. Furthermore, the loading of the old value inside the loop in the 32-bit case is unnecessary: if the value doesn't match, the CMPXCHG8B instruction will already have loaded the "new previous" value for us. Clean up the comment, too, and remove page references to obsolete versions of the Intel SDM. Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/include/asm/cmpxchg_32.h | 67 ++++++++++++--------------------------- arch/x86/include/asm/cmpxchg_64.h | 4 +-- 2 files changed, 21 insertions(+), 50 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index c1cf59d72f0..20955ea7bc1 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -53,60 +53,33 @@ struct __xchg_dummy { __xchg((v), (ptr), sizeof(*ptr)) /* - * The semantics of XCHGCMP8B are a bit strange, this is why - * there is a loop and the loading of %%eax and %%edx has to - * be inside. This inlines well in most cases, the cached - * cost is around ~38 cycles. (in the future we might want - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that - * might have an implicit FPU-save as a cost, so it's not - * clear which path to go.) + * CMPXCHG8B only writes to the target if we had the previous + * value in registers, otherwise it acts as a read and gives us the + * "new previous" value. That is why there is a loop. Preloading + * EDX:EAX is a performance optimization: in the common case it means + * we need only one locked operation. * - * cmpxchg8b must be used with the lock prefix here to allow - * the instruction to be executed atomically, see page 3-102 - * of the instruction set reference 24319102.pdf. We need - * the reader side to see the coherent 64bit value. + * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very + * least an FPU save and/or %cr0.ts manipulation. + * + * cmpxchg8b must be used with the lock prefix here to allow the + * instruction to be executed atomically. We need to have the reader + * side to see the coherent 64bit value. */ -static inline void __set_64bit(unsigned long long *ptr, - unsigned int low, unsigned int high) +static inline void set_64bit(volatile u64 *ptr, u64 value) { + u32 low = value; + u32 high = value >> 32; + u64 prev = *ptr; + asm volatile("\n1:\t" - "movl (%1), %%eax\n\t" - "movl 4(%1), %%edx\n\t" - LOCK_PREFIX "cmpxchg8b (%1)\n\t" + LOCK_PREFIX "cmpxchg8b %0\n\t" "jnz 1b" - : "=m" (*ptr) - : "D" (ptr), - "b" (low), - "c" (high) - : "ax", "dx", "memory"); -} - -static inline void __set_64bit_constant(unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32)); -} - -#define ll_low(x) *(((unsigned int *)&(x)) + 0) -#define ll_high(x) *(((unsigned int *)&(x)) + 1) - -static inline void __set_64bit_var(unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr, ll_low(value), ll_high(value)); + : "=m" (*ptr), "+A" (prev) + : "b" (low), "c" (high) + : "memory"); } -#define set_64bit(ptr, value) \ - (__builtin_constant_p((value)) \ - ? __set_64bit_constant((ptr), (value)) \ - : __set_64bit_var((ptr), (value))) - -#define _set_64bit(ptr, value) \ - (__builtin_constant_p(value) \ - ? __set_64bit(ptr, (unsigned int)(value), \ - (unsigned int)((value) >> 32)) \ - : __set_64bit(ptr, ll_low((value)), ll_high((value)))) - extern void __cmpxchg_wrong_size(void); /* diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index b92f147339f..9596e7c6196 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -5,13 +5,11 @@ #define __xg(x) ((volatile long *)(x)) -static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) +static inline void set_64bit(volatile u64 *ptr, u64 val) { *ptr = val; } -#define _set_64bit set_64bit - extern void __xchg_wrong_size(void); extern void __cmpxchg_wrong_size(void); -- cgit v1.2.3-18-g5258 From 4532b305e8f0c238dd73048068ff8a6dd1380291 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 28 Jul 2010 15:18:35 -0700 Subject: x86, asm: Clean up and simplify Remove the __xg() hack to create a memory barrier near xchg and cmpxchg; it has been there since 1.3.11 but should not be necessary with "asm volatile" and a "memory" clobber, neither of which were there in the original implementation. However, we *should* make this a volatile reference. Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/include/asm/cmpxchg_32.h | 75 ++++++++++++++++++++++----------------- arch/x86/include/asm/cmpxchg_64.h | 61 +++++++++++++++++++++---------- 2 files changed, 84 insertions(+), 52 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 20955ea7bc1..f5bd1fd388f 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -11,38 +11,42 @@ extern void __xchg_wrong_size(void); /* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around. */ - -struct __xchg_dummy { - unsigned long a[100]; -}; -#define __xg(x) ((struct __xchg_dummy *)(x)) - #define __xchg(x, ptr, size) \ ({ \ __typeof(*(ptr)) __x = (x); \ switch (size) { \ case 1: \ - asm volatile("xchgb %b0,%1" \ - : "=q" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile("xchgb %0,%1" \ + : "=q" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 2: \ - asm volatile("xchgw %w0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile("xchgw %0,%1" \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 4: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ asm volatile("xchgl %0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ default: \ __xchg_wrong_size(); \ } \ @@ -94,23 +98,32 @@ extern void __cmpxchg_wrong_size(void); __typeof__(*(ptr)) __new = (new); \ switch (size) { \ case 1: \ - asm volatile(lock "cmpxchgb %b2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "q" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 2: \ - asm volatile(lock "cmpxchgw %w2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 4: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ asm volatile(lock "cmpxchgl %2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ default: \ __cmpxchg_wrong_size(); \ } \ @@ -148,31 +161,27 @@ extern void __cmpxchg_wrong_size(void); (unsigned long long)(n))) #endif -static inline unsigned long long __cmpxchg64(volatile void *ptr, - unsigned long long old, - unsigned long long new) +static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) { - unsigned long long prev; + u64 prev; asm volatile(LOCK_PREFIX "cmpxchg8b %1" : "=A" (prev), - "+m" (*__xg(ptr)) - : "b" ((unsigned long)new), - "c" ((unsigned long)(new >> 32)), + "+m" (*ptr) + : "b" ((u32)new), + "c" ((u32)(new >> 32)), "0" (old) : "memory"); return prev; } -static inline unsigned long long __cmpxchg64_local(volatile void *ptr, - unsigned long long old, - unsigned long long new) +static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) { - unsigned long long prev; + u64 prev; asm volatile("cmpxchg8b %1" : "=A" (prev), - "+m" (*__xg(ptr)) - : "b" ((unsigned long)new), - "c" ((unsigned long)(new >> 32)), + "+m" (*ptr) + : "b" ((u32)new), + "c" ((u32)(new >> 32)), "0" (old) : "memory"); return prev; diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 9596e7c6196..423ae58aa02 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -3,8 +3,6 @@ #include /* Provides LOCK_PREFIX */ -#define __xg(x) ((volatile long *)(x)) - static inline void set_64bit(volatile u64 *ptr, u64 val) { *ptr = val; @@ -14,38 +12,51 @@ extern void __xchg_wrong_size(void); extern void __cmpxchg_wrong_size(void); /* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around. */ #define __xchg(x, ptr, size) \ ({ \ __typeof(*(ptr)) __x = (x); \ switch (size) { \ case 1: \ - asm volatile("xchgb %b0,%1" \ - : "=q" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile("xchgb %0,%1" \ + : "=q" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 2: \ - asm volatile("xchgw %w0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile("xchgw %0,%1" \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 4: \ - asm volatile("xchgl %k0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile("xchgl %0,%1" \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 8: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ asm volatile("xchgq %0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ default: \ __xchg_wrong_size(); \ } \ @@ -69,29 +80,41 @@ extern void __cmpxchg_wrong_size(void); __typeof__(*(ptr)) __new = (new); \ switch (size) { \ case 1: \ - asm volatile(lock "cmpxchgb %b2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "q" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 2: \ - asm volatile(lock "cmpxchgw %w2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 4: \ - asm volatile(lock "cmpxchgl %k2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 8: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ asm volatile(lock "cmpxchgq %2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ default: \ __cmpxchg_wrong_size(); \ } \ -- cgit v1.2.3-18-g5258 From 90c8f92f5c807807ca74d5f2f313794925174e6b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 28 Jul 2010 16:53:49 -0700 Subject: x86, asm: Move cmpxchg emulation code to arch/x86/lib Move cmpxchg emulation code from arch/x86/kernel/cpu (which is otherwise CPU identification) to arch/x86/lib, where other emulation code lives already. Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/cmpxchg.c | 72 ------------------------------------------- arch/x86/lib/Makefile | 1 + arch/x86/lib/cmpxchg.c | 72 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 73 deletions(-) delete mode 100644 arch/x86/kernel/cpu/cmpxchg.c create mode 100644 arch/x86/lib/cmpxchg.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3a785da34b6..c47c43914ba 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -16,7 +16,7 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o sched.o mshyperv.o -obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o +obj-$(CONFIG_X86_32) += bugs.o obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_CPU_SUP_INTEL) += intel.o diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c deleted file mode 100644 index 2056ccf572c..00000000000 --- a/arch/x86/kernel/cpu/cmpxchg.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * cmpxchg*() fallbacks for CPU not supporting these instructions - */ - -#include -#include -#include - -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif - -#ifndef CONFIG_X86_CMPXCHG64 -unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) -{ - u64 prev; - unsigned long flags; - - /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u64 *)ptr; - if (prev == old) - *(u64 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_486_u64); -#endif - diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f871e04b696..e10cf070ede 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -30,6 +30,7 @@ ifeq ($(CONFIG_X86_32),y) lib-y += checksum_32.o lib-y += strstr_32.o lib-y += semaphore_32.o string_32.o + lib-y += cmpxchg.o ifneq ($(CONFIG_X86_CMPXCHG64),y) lib-y += cmpxchg8b_emu.o atomic64_386_32.o endif diff --git a/arch/x86/lib/cmpxchg.c b/arch/x86/lib/cmpxchg.c new file mode 100644 index 00000000000..2056ccf572c --- /dev/null +++ b/arch/x86/lib/cmpxchg.c @@ -0,0 +1,72 @@ +/* + * cmpxchg*() fallbacks for CPU not supporting these instructions + */ + +#include +#include +#include + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + -- cgit v1.2.3-18-g5258 From a378d9338e8dde78314b3a6ae003de351936c729 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 28 Jul 2010 17:05:11 -0700 Subject: x86, asm: Merge cmpxchg_486_u64() and cmpxchg8b_emu() We have two functions for doing exactly the same thing -- emulating cmpxchg8b on 486 and older hardware -- with different calling conventions, and yet doing the same thing. Drop the C version and use the assembly version, via alternatives, for both the local and non-local versions of cmpxchg8b. Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/include/asm/cmpxchg_32.h | 30 ++++++++++++++---------------- arch/x86/lib/cmpxchg.c | 18 ------------------ 2 files changed, 14 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index f5bd1fd388f..284a6e8f7ce 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -246,8 +246,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, * to simulate the cmpxchg8b on the 80386 and 80486 CPU. */ -extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); - #define cmpxchg64(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ @@ -265,20 +263,20 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); __ret; }) - -#define cmpxchg64_local(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - if (likely(boot_cpu_data.x86 > 4)) \ - __ret = (__typeof__(*(ptr)))__cmpxchg64_local((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n)); \ - else \ - __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n)); \ - __ret; \ -}) +#define cmpxchg64_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (o); \ + __typeof__(*(ptr)) __new = (n); \ + alternative_io("call cmpxchg8b_emu", \ + "cmpxchg8b (%%esi)" , \ + X86_FEATURE_CX8, \ + "=A" (__ret), \ + "S" ((ptr)), "0" (__old), \ + "b" ((unsigned int)__new), \ + "c" ((unsigned int)(__new>>32)) \ + : "memory"); \ + __ret; }) #endif diff --git a/arch/x86/lib/cmpxchg.c b/arch/x86/lib/cmpxchg.c index 2056ccf572c..5d619f6df3e 100644 --- a/arch/x86/lib/cmpxchg.c +++ b/arch/x86/lib/cmpxchg.c @@ -52,21 +52,3 @@ unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) } EXPORT_SYMBOL(cmpxchg_386_u32); #endif - -#ifndef CONFIG_X86_CMPXCHG64 -unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) -{ - u64 prev; - unsigned long flags; - - /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u64 *)ptr; - if (prev == old) - *(u64 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_486_u64); -#endif - -- cgit v1.2.3-18-g5258 From c15a5958a0b6dbf06b3c05972694f04a0c50a4cf Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 31 Jul 2010 12:48:22 -0400 Subject: x86-64, asm: Directly access per-cpu IST Use a direct per-cpu reference for the IST instead of using a scratch register. Signed-off-by: Brian Gerst LKML-Reference: <1280594903-6341-1-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4db7c4d12ff..59af275b37a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1065,6 +1065,7 @@ ENTRY(\sym) END(\sym) .endm +#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) .macro paranoidzeroentry_ist sym do_sym ist ENTRY(\sym) INTR_FRAME @@ -1076,10 +1077,9 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - PER_CPU(init_tss, %r12) - subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) + subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) call \do_sym - addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) + addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC END(\sym) -- cgit v1.2.3-18-g5258 From 72c511dd596cff88d6523f231a0fbb8f73006d51 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 31 Jul 2010 12:48:23 -0400 Subject: x86-32, asm: Directly access per-cpu GDT Use a direct per-cpu reference for the GDT instead of using a scratch register. Signed-off-by: Brian Gerst LKML-Reference: <1280594903-6341-2-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index cd49141cf15..233c5829e7a 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -611,14 +611,14 @@ ldt_ss: * compensating for the offset by changing to the ESPFIX segment with * a base address that matches for the difference. */ +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) mov %esp, %edx /* load kernel esp */ mov PT_OLDESP(%esp), %eax /* load userspace esp */ mov %dx, %ax /* eax: new kernel esp */ sub %eax, %edx /* offset (low word is 0) */ - PER_CPU(gdt_page, %ebx) shr $16, %edx - mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ - mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ + mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ + mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ pushl $__ESPFIX_SS CFI_ADJUST_CFA_OFFSET 4 push %eax /* new kernel esp */ @@ -791,9 +791,8 @@ ptregs_clone: * normal stack and adjusts ESP with the matching offset. */ /* fixup the stack */ - PER_CPU(gdt_page, %ebx) - mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ - mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ + mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ + mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ shl $16, %eax addl %esp, %eax /* the adjusted stack pointer */ pushl $__KERNEL_DS -- cgit v1.2.3-18-g5258