From 8e221b6db4477643fefc885a97ea9889ac733140 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 22 Jun 2010 16:23:37 -0700 Subject: x86: Avoid unnecessary __clear_user() and xrstor in signal handling fxsave/xsave doesn't touch all the bytes in the memory layout used by these instructions. Specifically SW reserved (bytes 464..511) fields in the fxsave frame and the reserved fields in the xsave header. To present a clean context for the signal handling, just clear these fields instead of clearing the complete fxsave/xsave memory layout, when we dump these registers directly to the user signal frame. Also avoid the call to second xrstor (which inits the state not passed in the signal frame) in restore_user_xstate() if all the state has already been restored by the first xrstor. These changes improve the performance of signal handling(by ~3-5% as measured by the lat_sig). Signed-off-by: Suresh Siddha LKML-Reference: <1277249017.2847.85.camel@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 9 +++++++++ arch/x86/include/asm/xsave.h | 10 ++++++++++ arch/x86/kernel/xsave.c | 12 ++---------- 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c991b3a7b90..0f1cf5d53dd 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -127,6 +127,15 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; + /* + * Clear the bytes not touched by the fxsave and reserved + * for the SW usage. + */ + err = __clear_user(&fx->sw_reserved, + sizeof(struct _fpx_sw_bytes)); + if (unlikely(err)) + return -EFAULT; + asm volatile("1: rex64/fxsave (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 2c4390cae22..30dfc81804d 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -59,6 +59,16 @@ static inline int fpu_xrstor_checking(struct fpu *fpu) static inline int xsave_user(struct xsave_struct __user *buf) { int err; + + /* + * Clear the xsave header first, so that reserved fields are + * initialized to zero. + */ + err = __clear_user(&buf->xsave_hdr, + sizeof(struct xsave_hdr_struct)); + if (unlikely(err)) + return -EFAULT; + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" "2:\n" ".section .fixup,\"ax\"\n" diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 37e68fc5e24..6e73db1b7b4 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -91,14 +91,6 @@ int save_i387_xstate(void __user *buf) return 0; if (task_thread_info(tsk)->status & TS_USEDFPU) { - /* - * Start with clearing the user buffer. This will present a - * clean context for the bytes not touched by the fxsave/xsave. - */ - err = __clear_user(buf, sig_xstate_size); - if (err) - return err; - if (use_xsave()) err = xsave_user(buf); else @@ -184,8 +176,8 @@ static int restore_user_xstate(void __user *buf) * init the state skipped by the user. */ mask = pcntxt_mask & ~mask; - - xrstor_state(init_xstate_buf, mask); + if (unlikely(mask)) + xrstor_state(init_xstate_buf, mask); return 0; -- cgit v1.2.3-18-g5258 From 5bbd4a336c81d32df71642abf310cf3d0c98dc9b Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 7 Jul 2010 19:51:59 -0400 Subject: x86/apic/es7000_32: Remove unused variable In today's linux-next I got this compile warning: arch/x86/kernel/apic/es7000_32.c:132: warning: 'base' defined but not used Current patch solves the issue removing the unused variable. Signed-off-by: Javier Martinez Canillas Cc: Rakib Mullick Cc: Eric W. Biederman Cc: Cyrill Gorcunov Cc: Tejun Heo LKML-Reference: <1278546719.9020.4.camel@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 425e53a87fe..8593582d802 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -129,7 +129,6 @@ int es7000_plat; * GSI override for ES7000 platforms. */ -static unsigned int base; static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) { -- cgit v1.2.3-18-g5258 From b2691085d1f3ccce641dcfdd02722ba5d34db6ba Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 28 Jun 2010 16:46:48 -0700 Subject: x86: Clean up arch/x86/kernel/cpu/mtrr/cleanup.c: use ";" not "," to terminate statements Also needed if pr_ becomes a bit more space efficient. Signed-off-by: Joe Perches Acked-by: Thomas Gleixner Cc: "H. Peter Anvin" LKML-Reference: <1277768808.29157.280.camel@Joe-Laptop.home> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cleanup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 06130b52f01..c5f59d07142 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -632,9 +632,9 @@ static void __init mtrr_print_out_one_result(int i) unsigned long gran_base, chunk_base, lose_base; char gran_factor, chunk_factor, lose_factor; - gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), - chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), - lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor); + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor); + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor); pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", result[i].bad ? "*BAD*" : " ", -- cgit v1.2.3-18-g5258 From 68f202e4e87cfab4439568bf397fcc5c7cf8d729 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 30 Jul 2010 11:46:42 -0700 Subject: x86, mtrr: Use stop machine context to rendezvous all the cpu's Use the stop machine context rather than IPI's to rendezvous all the cpus for MTRR initialization that happens during cpu bringup or for MTRR modifications during runtime. This avoids deadlock scenario (reported by Prarit) like: cpu A holds a read_lock (tasklist_lock for example) with irqs enabled cpu B waits for the same lock with irqs disabled using write_lock_irq cpu C doing set_mtrr() (during AP bringup for example), which will try to rendezvous all the cpus using IPI's This will result in C and A come to the rendezvous point and waiting for B. B is stuck forever waiting for the lock and thus not reaching the rendezvous point. Using stop cpu (run in the process context of per cpu based keventd) to do this rendezvous, avoids this deadlock scenario. Also make sure all the cpu's are in the rendezvous handler before we proceed with the local_irq_save() on each cpu. This lock step disabling irqs on all the cpus will avoid other deadlock scenarios (for example involving with the blocking smp_call_function's etc). [ This problem is very old. Marking -stable only for 2.6.35 as the stop_one_cpu_nowait() API is present only in 2.6.35. Any older kernel interested in this fix need to do some more work in backporting this patch. ] Reported-by: Prarit Bhargava Signed-off-by: Suresh Siddha LKML-Reference: <1280515602.2682.10.camel@sbsiddha-MOBL3.sc.intel.com> Acked-by: Prarit Bhargava Cc: stable@kernel.org [2.6.35] Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 56 +++++++++++++++++++++++++++++++---------- arch/x86/kernel/smpboot.c | 7 ++++++ 2 files changed, 50 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 79556bd9b60..01c0f3ee6cc 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -35,6 +35,7 @@ #include /* FIXME: kvm_para.h needs this */ +#include #include #include #include @@ -143,22 +144,28 @@ struct set_mtrr_data { mtrr_type smp_type; }; +static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); + /** - * ipi_handler - Synchronisation handler. Executed by "other" CPUs. + * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. * @info: pointer to mtrr configuration data * * Returns nothing. */ -static void ipi_handler(void *info) +static int mtrr_work_handler(void *info) { #ifdef CONFIG_SMP struct set_mtrr_data *data = info; unsigned long flags; + atomic_dec(&data->count); + while (!atomic_read(&data->gate)) + cpu_relax(); + local_irq_save(flags); atomic_dec(&data->count); - while (!atomic_read(&data->gate)) + while (atomic_read(&data->gate)) cpu_relax(); /* The master has cleared me to execute */ @@ -173,12 +180,13 @@ static void ipi_handler(void *info) } atomic_dec(&data->count); - while (atomic_read(&data->gate)) + while (!atomic_read(&data->gate)) cpu_relax(); atomic_dec(&data->count); local_irq_restore(flags); #endif + return 0; } static inline int types_compatible(mtrr_type type1, mtrr_type type2) @@ -198,7 +206,7 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) * * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: * - * 1. Send IPI to do the following: + * 1. Queue work to do the following on all processors: * 2. Disable Interrupts * 3. Wait for all procs to do so * 4. Enter no-fill cache mode @@ -215,14 +223,17 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) * 15. Enable interrupts. * * What does that mean for us? Well, first we set data.count to the number - * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait - * until it hits 0 and proceed. We set the data.gate flag and reset data.count. - * Meanwhile, they are waiting for that flag to be set. Once it's set, each + * of CPUs. As each CPU announces that it started the rendezvous handler by + * decrementing the count, We reset data.count and set the data.gate flag + * allowing all the cpu's to proceed with the work. As each cpu disables + * interrupts, it'll decrement data.count once. We wait until it hits 0 and + * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they + * are waiting for that flag to be cleared. Once it's cleared, each * CPU goes through the transition of updating MTRRs. * The CPU vendors may each do it differently, * so we call mtrr_if->set() callback and let them take care of it. * When they're done, they again decrement data->count and wait for data.gate - * to be reset. + * to be set. * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag * Everyone then enables interrupts and we all continue on. * @@ -234,6 +245,9 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ { struct set_mtrr_data data; unsigned long flags; + int cpu; + + preempt_disable(); data.smp_reg = reg; data.smp_base = base; @@ -246,10 +260,15 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ atomic_set(&data.gate, 0); /* Start the ball rolling on other CPUs */ - if (smp_call_function(ipi_handler, &data, 0) != 0) - panic("mtrr: timed out waiting for other CPUs\n"); + for_each_online_cpu(cpu) { + struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); + + if (cpu == smp_processor_id()) + continue; + + stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); + } - local_irq_save(flags); while (atomic_read(&data.count)) cpu_relax(); @@ -259,6 +278,16 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ smp_wmb(); atomic_set(&data.gate, 1); + local_irq_save(flags); + + while (atomic_read(&data.count)) + cpu_relax(); + + /* Ok, reset count and toggle gate */ + atomic_set(&data.count, num_booting_cpus() - 1); + smp_wmb(); + atomic_set(&data.gate, 0); + /* Do our MTRR business */ /* @@ -279,7 +308,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ atomic_set(&data.count, num_booting_cpus() - 1); smp_wmb(); - atomic_set(&data.gate, 0); + atomic_set(&data.gate, 1); /* * Wait here for everyone to have seen the gate change @@ -289,6 +318,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ cpu_relax(); local_irq_restore(flags); + preempt_enable(); } /** diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c4f33b2e77d..11015fd1abb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -816,6 +816,13 @@ do_rest: if (cpumask_test_cpu(cpu, cpu_callin_mask)) break; /* It has booted */ udelay(100); + /* + * Allow other tasks to run while we wait for the + * AP to come online. This also gives a chance + * for the MTRR work(triggered by the AP coming online) + * to be completed in the stop machine context. + */ + schedule(); } if (cpumask_test_cpu(cpu, cpu_callin_mask)) -- cgit v1.2.3-18-g5258 From be783a47214afc5a0aea9dafcbd9f1535ba05e94 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 2 Aug 2010 08:49:34 +0800 Subject: x86, vdso: Unmap vdso pages We mapped vdso pages but never unmapped them and the virtual address is lost after exiting from the function, so unmap vdso pages here. Signed-off-by: Shaohua Li LKML-Reference: <20100802004934.GA2505@sli10-desk.sh.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/vdso/vma.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index ac74869b814..80f23ed483e 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -67,6 +67,7 @@ static int __init init_vdso_vars(void) *(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x; #include "vextern.h" #undef VEXTERN + vunmap(vbase); return 0; oom: -- cgit v1.2.3-18-g5258 From 9f242dc10e0c3c1eb32d8c83c18650a35fd7f80d Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 2 Aug 2010 16:10:37 -0700 Subject: x86, vmware: Preset lpj values when on VMware. When running on VMware's platform, we have seen situations where the AP's try to calibrate the lpj values and fail to get good calibration runs becasue of timing issues. As a result delays don't work correctly on all cpus. The solutions is to set preset_lpj value based on the current tsc frequency value. This is similar to what KVM does as well. Signed-off-by: Alok N Kataria LKML-Reference: <1280790637.14933.29.camel@ank32.eng.vmware.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/vmware.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index b9d1ff58844..227b0448960 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -51,7 +51,7 @@ static inline int __vmware_platform(void) static unsigned long vmware_get_tsc_khz(void) { - uint64_t tsc_hz; + uint64_t tsc_hz, lpj; uint32_t eax, ebx, ecx, edx; VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); @@ -62,6 +62,13 @@ static unsigned long vmware_get_tsc_khz(void) printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", (unsigned long) tsc_hz / 1000, (unsigned long) tsc_hz % 1000); + + if (!preset_lpj) { + lpj = ((u64)tsc_hz * 1000); + do_div(lpj, HZ); + preset_lpj = lpj; + } + return tsc_hz; } -- cgit v1.2.3-18-g5258