aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/common.c')
-rw-r--r--arch/x86/kernel/cpu/common.c257
1 files changed, 139 insertions, 118 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 83492b1f93b..826d5c87627 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,14 +21,14 @@
#include <asm/asm.h>
#include <asm/numa.h>
#include <asm/smp.h>
-#ifdef CONFIG_X86_LOCAL_APIC
-#include <asm/mpspec.h>
+#include <asm/cpu.h>
+#include <asm/cpumask.h>
#include <asm/apic.h>
-#include <mach_apic.h>
-#include <asm/genapic.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/uv/uv.h>
#endif
-#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/desc.h>
@@ -37,6 +37,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
+#include <asm/stackprotector.h>
#include "cpu.h"
@@ -50,6 +51,15 @@ cpumask_var_t cpu_initialized_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
+/* correctly size the local cpu masks */
+void __init setup_cpu_local_masks(void)
+{
+ alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+ alloc_bootmem_cpumask_var(&cpu_callin_mask);
+ alloc_bootmem_cpumask_var(&cpu_callout_mask);
+ alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
#else /* CONFIG_X86_32 */
cpumask_t cpu_callin_map;
@@ -62,23 +72,23 @@ cpumask_t cpu_sibling_setup_map;
static struct cpu_dev *this_cpu __cpuinitdata;
+DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
-/* We need valid kernel segments for data and code in long mode too
- * IRET will check the segment types kkeil 2000/10/28
- * Also sysret mandates a special GDT layout
- */
-/* The TLS descriptors are currently at a different place compared to i386.
- Hopefully nobody expects them at a fixed place (Wine?) */
-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+ /*
+ * We need valid kernel segments for data and code in long mode too
+ * IRET will check the segment types kkeil 2000/10/28
+ * Also sysret mandates a special GDT layout
+ *
+ * The TLS descriptors are currently at a different place compared to i386.
+ * Hopefully nobody expects them at a fixed place (Wine?)
+ */
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
-} };
#else
-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -110,9 +120,10 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
- [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
-} };
+ [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
+ GDT_STACK_CANARY_INIT
#endif
+} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
#ifdef CONFIG_X86_32
@@ -213,6 +224,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
#endif
/*
+ * Some CPU features depend on higher CPUID levels, which may not always
+ * be available due to CPUID level capping or broken virtualization
+ * software. Add those features to this table to auto-disable them.
+ */
+struct cpuid_dependent_feature {
+ u32 feature;
+ u32 level;
+};
+static const struct cpuid_dependent_feature __cpuinitconst
+cpuid_dependent_features[] = {
+ { X86_FEATURE_MWAIT, 0x00000005 },
+ { X86_FEATURE_DCA, 0x00000009 },
+ { X86_FEATURE_XSAVE, 0x0000000d },
+ { 0, 0 }
+};
+
+static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
+{
+ const struct cpuid_dependent_feature *df;
+ for (df = cpuid_dependent_features; df->feature; df++) {
+ /*
+ * Note: cpuid_level is set to -1 if unavailable, but
+ * extended_extended_level is set to 0 if unavailable
+ * and the legitimate extended levels are all negative
+ * when signed; hence the weird messing around with
+ * signs here...
+ */
+ if (cpu_has(c, df->feature) &&
+ ((s32)df->level < 0 ?
+ (u32)df->level > (u32)c->extended_cpuid_level :
+ (s32)df->level > (s32)c->cpuid_level)) {
+ clear_cpu_cap(c, df->feature);
+ if (warn)
+ printk(KERN_WARNING
+ "CPU: CPU feature %s disabled "
+ "due to lack of CPUID level 0x%x\n",
+ x86_cap_flags[df->feature],
+ df->level);
+ }
+ }
+}
+
+/*
* Naming convention should be: <Name> [(<Codename>)]
* This table only is used unless init_<vendor>() below doesn't set it;
* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
@@ -242,18 +296,29 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+void load_percpu_segment(int cpu)
+{
+#ifdef CONFIG_X86_32
+ loadsegment(fs, __KERNEL_PERCPU);
+#else
+ loadsegment(gs, 0);
+ wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+#endif
+ load_stack_canary_segment();
+}
+
/* Current gdt points %fs at the "master" per-cpu area: after this,
* it's on the real one. */
-void switch_to_new_gdt(void)
+void switch_to_new_gdt(int cpu)
{
struct desc_ptr gdt_descr;
- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+ gdt_descr.address = (long)get_cpu_gdt_table(cpu);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
-#ifdef CONFIG_X86_32
- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
-#endif
+ /* Reload the per-cpu base */
+
+ load_percpu_segment(cpu);
}
static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@@ -383,11 +448,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
}
index_msb = get_count_order(smp_num_siblings);
-#ifdef CONFIG_X86_64
- c->phys_proc_id = phys_pkg_id(index_msb);
-#else
- c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
-#endif
+ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
smp_num_siblings = smp_num_siblings / c->x86_max_cores;
@@ -395,13 +456,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
core_bits = get_count_order(c->x86_max_cores);
-#ifdef CONFIG_X86_64
- c->cpu_core_id = phys_pkg_id(index_msb) &
+ c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
((1 << core_bits) - 1);
-#else
- c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
- ((1 << core_bits) - 1);
-#endif
}
out:
@@ -570,11 +626,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
- validate_pat_support(c);
-
#ifdef CONFIG_SMP
c->cpu_index = boot_cpu_id;
#endif
+ filter_cpuid_features(c, false);
}
void __init early_cpu_init(void)
@@ -637,7 +692,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
#ifdef CONFIG_X86_32
# ifdef CONFIG_X86_HT
- c->apicid = phys_pkg_id(c->initial_apicid, 0);
+ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
# else
c->apicid = c->initial_apicid;
# endif
@@ -684,7 +739,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
this_cpu->c_identify(c);
#ifdef CONFIG_X86_64
- c->apicid = phys_pkg_id(0);
+ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif
/*
@@ -708,6 +763,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
* we do "generic changes."
*/
+ /* Filter out anything that depends on CPUID levels we don't have */
+ filter_cpuid_features(c, true);
+
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
char *p;
@@ -877,54 +935,22 @@ static __init int setup_disablecpuid(char *arg)
__setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
-struct x8664_pda **_cpu_pda __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
-
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+DEFINE_PER_CPU_FIRST(union irq_stack_union,
+ irq_stack_union) __aligned(PAGE_SIZE);
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
-void __cpuinit pda_init(int cpu)
-{
- struct x8664_pda *pda = cpu_pda(cpu);
+DEFINE_PER_CPU(unsigned long, kernel_stack) =
+ (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(kernel_stack);
- /* Setup up data that may be needed in __get_free_pages early */
- loadsegment(fs, 0);
- loadsegment(gs, 0);
- /* Memory clobbers used to order PDA accessed */
- mb();
- wrmsrl(MSR_GS_BASE, pda);
- mb();
-
- pda->cpunumber = cpu;
- pda->irqcount = -1;
- pda->kernelstack = (unsigned long)stack_thread_info() -
- PDA_STACKOFFSET + THREAD_SIZE;
- pda->active_mm = &init_mm;
- pda->mmu_state = 0;
-
- if (cpu == 0) {
- /* others are initialized in smpboot.c */
- pda->pcurrent = &init_task;
- pda->irqstackptr = boot_cpu_stack;
- pda->irqstackptr += IRQSTACKSIZE - 64;
- } else {
- if (!pda->irqstackptr) {
- pda->irqstackptr = (char *)
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
- if (!pda->irqstackptr)
- panic("cannot allocate irqstack for cpu %d",
- cpu);
- pda->irqstackptr += IRQSTACKSIZE - 64;
- }
+DEFINE_PER_CPU(unsigned int, irq_count) = -1;
- if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
- pda->nodenumber = cpu_to_node(cpu);
- }
-}
-
-static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
- DEBUG_STKSZ] __page_aligned_bss;
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
+ __aligned(PAGE_SIZE);
extern asmlinkage void ignore_sysret(void);
@@ -957,16 +983,21 @@ unsigned long kernel_eflags;
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
-#else
+#else /* x86_64 */
-/* Make sure %fs is initialized properly in idle threads */
+#ifdef CONFIG_CC_STACKPROTECTOR
+DEFINE_PER_CPU(unsigned long, stack_canary);
+#endif
+
+/* Make sure %fs and %gs are initialized properly in idle threads */
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU;
+ regs->gs = __KERNEL_STACK_CANARY;
return regs;
}
-#endif
+#endif /* x86_64 */
/*
* cpu_init() initializes state that is per-CPU. Some data is already
@@ -982,15 +1013,14 @@ void __cpuinit cpu_init(void)
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
unsigned long v;
- char *estacks = NULL;
struct task_struct *me;
int i;
- /* CPU 0 is initialised in head64.c */
- if (cpu != 0)
- pda_init(cpu);
- else
- estacks = boot_exception_stacks;
+#ifdef CONFIG_NUMA
+ if (cpu != 0 && percpu_read(node_number) == 0 &&
+ cpu_to_node(cpu) != NUMA_NO_NODE)
+ percpu_write(node_number, cpu_to_node(cpu));
+#endif
me = current;
@@ -1006,7 +1036,9 @@ void __cpuinit cpu_init(void)
* and set up the GDT descriptor:
*/
- switch_to_new_gdt();
+ switch_to_new_gdt(cpu);
+ loadsegment(fs, 0);
+
load_idt((const struct desc_ptr *)&idt_descr);
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1017,25 +1049,20 @@ void __cpuinit cpu_init(void)
barrier();
check_efer();
- if (cpu != 0 && x2apic)
+ if (cpu != 0)
enable_x2apic();
/*
* set up and load the per-CPU TSS
*/
if (!orig_ist->ist[0]) {
- static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+ static const unsigned int sizes[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
};
+ char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
- if (cpu) {
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
- if (!estacks)
- panic("Cannot allocate exception "
- "stack %ld %d\n", v, cpu);
- }
- estacks += PAGE_SIZE << order[v];
+ estacks += sizes[v];
orig_ist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
@@ -1069,22 +1096,19 @@ void __cpuinit cpu_init(void)
*/
if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
arch_kgdb_ops.correct_hw_break();
- else {
+ else
#endif
- /*
- * Clear all 6 debug registers:
- */
-
- set_debugreg(0UL, 0);
- set_debugreg(0UL, 1);
- set_debugreg(0UL, 2);
- set_debugreg(0UL, 3);
- set_debugreg(0UL, 6);
- set_debugreg(0UL, 7);
-#ifdef CONFIG_KGDB
- /* If the kgdb is connected no debug regs should be altered. */
+ {
+ /*
+ * Clear all 6 debug registers:
+ */
+ set_debugreg(0UL, 0);
+ set_debugreg(0UL, 1);
+ set_debugreg(0UL, 2);
+ set_debugreg(0UL, 3);
+ set_debugreg(0UL, 6);
+ set_debugreg(0UL, 7);
}
-#endif
fpu_init();
@@ -1114,7 +1138,7 @@ void __cpuinit cpu_init(void)
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
load_idt(&idt_descr);
- switch_to_new_gdt();
+ switch_to_new_gdt(cpu);
/*
* Set up and load the per-CPU TSS and LDT
@@ -1135,9 +1159,6 @@ void __cpuinit cpu_init(void)
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
#endif
- /* Clear %gs. */
- asm volatile ("mov %0, %%gs" : : "r" (0));
-
/* Clear all 6 debug registers: */
set_debugreg(0, 0);
set_debugreg(0, 1);